0%

郭金玉-HRNET-交接文档

自动摘要: 前言 在入职至今的1个月中,我主要负责的内容为HRnet的2D模型优化处理,将人脸关键点换成牙齿关键点并准确识别。 个人理解 HRnet是一个人体姿态以及人脸的一个检测网络,这个 ……..

前言

在入职至今的1个月中,我主要负责的内容为HRnet的2D模型优化处理,将人脸关键点换成牙齿关键点并准确识别。

个人理解

HRnet是一个人体姿态以及人脸的一个检测网络,这个网络采用了2层卷积以及4倍下采样,用了残差网络链接BottleBlock模块,这个和resnet一样,对256维的数据进行了降为处理降到了64维度,然后在升至256维,减少了计算量,相对于其他网络HRnet在人脸关键点检测上取得了优越的成功,他的精度很高。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Create by Bin Xiao (Bin.Xiao@microsoft.com)
# Modified by Tianheng Cheng(tianhengcheng@gmail.com), Yang Zhao
# ------------------------------------------------------------------------------

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import logging

import torch
import torch.nn as nn
import torch.nn.functional as F


BatchNorm2d = nn.BatchNorm2d
BN_MOMENTUM = 0.01
logger = logging.getLogger(__name__)


def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3,
stride=stride, padding=1, bias=False)


class BasicBlock(nn.Module):
expansion = 1

def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.downsample = downsample
self.stride = stride

def forward(self, x):
residual = x

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)

if self.downsample is not None:
residual = self.downsample(x)

out += residual
out = self.relu(out)

return out


class Bottleneck(nn.Module):
expansion = 4

def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = BatchNorm2d(planes * self.expansion,
momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride

def forward(self, x):
residual = x

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)

out = self.conv3(out)
out = self.bn3(out)

if self.downsample is not None:
residual = self.downsample(x)

out += residual
out = self.relu(out)

return out


class HighResolutionModule(nn.Module):
def __init__(self, num_branches, blocks, num_blocks, num_inchannels,
num_channels, fuse_method, multi_scale_output=True):
super(HighResolutionModule, self).__init__()
self._check_branches(
num_branches, blocks, num_blocks, num_inchannels, num_channels)

self.num_inchannels = num_inchannels
self.fuse_method = fuse_method
self.num_branches = num_branches

self.multi_scale_output = multi_scale_output

self.branches = self._make_branches(
num_branches, blocks, num_blocks, num_channels)
self.fuse_layers = self._make_fuse_layers()
self.relu = nn.ReLU(inplace=True)

def _check_branches(self, num_branches, blocks, num_blocks,
num_inchannels, num_channels):
if num_branches != len(num_blocks):
error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format(
num_branches, len(num_blocks))
logger.error(error_msg)
raise ValueError(error_msg)

if num_branches != len(num_channels):
error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format(
num_branches, len(num_channels))
logger.error(error_msg)
raise ValueError(error_msg)

if num_branches != len(num_inchannels):
error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(
num_branches, len(num_inchannels))
logger.error(error_msg)
raise ValueError(error_msg)

def _make_one_branch(self, branch_index, block, num_blocks, num_channels,
stride=1):
downsample = None
if stride != 1 or \
self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.num_inchannels[branch_index],
num_channels[branch_index] * block.expansion,
kernel_size=1, stride=stride, bias=False),
BatchNorm2d(num_channels[branch_index] * block.expansion,
momentum=BN_MOMENTUM),
)

layers = []
layers.append(block(self.num_inchannels[branch_index],
num_channels[branch_index], stride, downsample))
self.num_inchannels[branch_index] = \
num_channels[branch_index] * block.expansion
for i in range(1, num_blocks[branch_index]):
layers.append(block(self.num_inchannels[branch_index],
num_channels[branch_index]))

return nn.Sequential(*layers)

def _make_branches(self, num_branches, block, num_blocks, num_channels):
branches = []

for i in range(num_branches):
branches.append(
self._make_one_branch(i, block, num_blocks, num_channels))

return nn.ModuleList(branches)

def _make_fuse_layers(self):
if self.num_branches == 1:
return None

num_branches = self.num_branches
num_inchannels = self.num_inchannels
fuse_layers = []
for i in range(num_branches if self.multi_scale_output else 1):
fuse_layer = []
for j in range(num_branches):
if j > i:
fuse_layer.append(nn.Sequential(
nn.Conv2d(num_inchannels[j],
num_inchannels[i],
1,
1,
0,
bias=False),
BatchNorm2d(num_inchannels[i], momentum=BN_MOMENTUM)))
# nn.Upsample(scale_factor=2**(j-i), mode='nearest')))
elif j == i:
fuse_layer.append(None)
else:
conv3x3s = []
for k in range(i - j):
if k == i - j - 1:
num_outchannels_conv3x3 = num_inchannels[i]
conv3x3s.append(nn.Sequential(
nn.Conv2d(num_inchannels[j],
num_outchannels_conv3x3,
3, 2, 1, bias=False),
BatchNorm2d(num_outchannels_conv3x3, momentum=BN_MOMENTUM)))
else:
num_outchannels_conv3x3 = num_inchannels[j]
conv3x3s.append(nn.Sequential(
nn.Conv2d(num_inchannels[j],
num_outchannels_conv3x3,
3, 2, 1, bias=False),
BatchNorm2d(num_outchannels_conv3x3,
momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)))
fuse_layer.append(nn.Sequential(*conv3x3s))
fuse_layers.append(nn.ModuleList(fuse_layer))

return nn.ModuleList(fuse_layers)

def get_num_inchannels(self):
return self.num_inchannels

def forward(self, x):
if self.num_branches == 1:
return [self.branches[0](x[0])]

for i in range(self.num_branches):
x[i] = self.branches[i](x[i])

x_fuse = []
for i in range(len(self.fuse_layers)):
y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
for j in range(1, self.num_branches):
if i == j:
y = y + x[j]
elif j > i:
y = y + F.interpolate(
self.fuse_layers[i][j](x[j]),
size=[x[i].shape[2], x[i].shape[3]],
mode='bilinear')
else:
y = y + self.fuse_layers[i][j](x[j])
x_fuse.append(self.relu(y))

return x_fuse


blocks_dict = {
'BASIC': BasicBlock,
'BOTTLENECK': Bottleneck
}


class HighResolutionNet(nn.Module):

def __init__(self, config, **kwargs):
self.inplanes = 64
extra = config.MODEL.EXTRA
super(HighResolutionNet, self).__init__()

# stem net
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1,
bias=False)
self.bn1 = BatchNorm2d(64, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1,
bias=False)
self.bn2 = BatchNorm2d(64, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.sf = nn.Softmax(dim=1)
self.layer1 = self._make_layer(Bottleneck, 64, 64, 4)

self.stage2_cfg = extra['STAGE2']
num_channels = self.stage2_cfg['NUM_CHANNELS']
block = blocks_dict[self.stage2_cfg['BLOCK']]
num_channels = [
num_channels[i] * block.expansion for i in range(len(num_channels))]
self.transition1 = self._make_transition_layer(
[256], num_channels)
self.stage2, pre_stage_channels = self._make_stage(
self.stage2_cfg, num_channels)

self.stage3_cfg = extra['STAGE3']
num_channels = self.stage3_cfg['NUM_CHANNELS']
block = blocks_dict[self.stage3_cfg['BLOCK']]
num_channels = [
num_channels[i] * block.expansion for i in range(len(num_channels))]
self.transition2 = self._make_transition_layer(
pre_stage_channels, num_channels)
self.stage3, pre_stage_channels = self._make_stage(
self.stage3_cfg, num_channels)

self.stage4_cfg = extra['STAGE4']
num_channels = self.stage4_cfg['NUM_CHANNELS']
block = blocks_dict[self.stage4_cfg['BLOCK']]
num_channels = [
num_channels[i] * block.expansion for i in range(len(num_channels))]
self.transition3 = self._make_transition_layer(
pre_stage_channels, num_channels)
self.stage4, pre_stage_channels = self._make_stage(
self.stage4_cfg, num_channels, multi_scale_output=True)

final_inp_channels = sum(pre_stage_channels)

self.head = nn.Sequential(
nn.Conv2d(
in_channels=final_inp_channels,
out_channels=final_inp_channels,
kernel_size=1,
stride=1,
padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0),
BatchNorm2d(final_inp_channels, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True),
nn.Conv2d(
in_channels=final_inp_channels,
out_channels=config.MODEL.NUM_JOINTS,
kernel_size=extra.FINAL_CONV_KERNEL,
stride=1,
padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0)
)

def _make_transition_layer(
self, num_channels_pre_layer, num_channels_cur_layer):
num_branches_cur = len(num_channels_cur_layer)
num_branches_pre = len(num_channels_pre_layer)

transition_layers = []
for i in range(num_branches_cur):
if i < num_branches_pre:
if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
transition_layers.append(nn.Sequential(
nn.Conv2d(num_channels_pre_layer[i],
num_channels_cur_layer[i],
3,
1,
1,
bias=False),
BatchNorm2d(
num_channels_cur_layer[i], momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)))
else:
transition_layers.append(None)
else:
conv3x3s = []
for j in range(i + 1 - num_branches_pre):
inchannels = num_channels_pre_layer[-1]
outchannels = num_channels_cur_layer[i] \
if j == i - num_branches_pre else inchannels
conv3x3s.append(nn.Sequential(
nn.Conv2d(
inchannels, outchannels, 3, 2, 1, bias=False),
BatchNorm2d(outchannels, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)))
transition_layers.append(nn.Sequential(*conv3x3s))

return nn.ModuleList(transition_layers)

def _make_layer(self, block, inplanes, planes, blocks, stride=1):
downsample = None
if stride != 1 or inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
)

layers = []
layers.append(block(inplanes, planes, stride, downsample))
inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(inplanes, planes))

return nn.Sequential(*layers)

def _make_stage(self, layer_config, num_inchannels,
multi_scale_output=True):
num_modules = layer_config['NUM_MODULES']
num_branches = layer_config['NUM_BRANCHES']
num_blocks = layer_config['NUM_BLOCKS']
num_channels = layer_config['NUM_CHANNELS']
block = blocks_dict[layer_config['BLOCK']]
fuse_method = layer_config['FUSE_METHOD']

modules = []
for i in range(num_modules):
# multi_scale_output is only used last module
if not multi_scale_output and i == num_modules - 1:
reset_multi_scale_output = False
else:
reset_multi_scale_output = True
modules.append(
HighResolutionModule(num_branches,
block,
num_blocks,
num_inchannels,
num_channels,
fuse_method,
reset_multi_scale_output)
)
num_inchannels = modules[-1].get_num_inchannels()

return nn.Sequential(*modules), num_inchannels

def forward(self, x):
# h, w = x.size(2), x.size(3)
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu(x)
x = self.layer1(x)

x_list = []
for i in range(self.stage2_cfg['NUM_BRANCHES']):
if self.transition1[i] is not None:
x_list.append(self.transition1[i](x))
else:
x_list.append(x)
y_list = self.stage2(x_list)

x_list = []
for i in range(self.stage3_cfg['NUM_BRANCHES']):
if self.transition2[i] is not None:
x_list.append(self.transition2[i](y_list[-1]))
else:
x_list.append(y_list[i])
y_list = self.stage3(x_list)

x_list = []
for i in range(self.stage4_cfg['NUM_BRANCHES']):
if self.transition3[i] is not None:
x_list.append(self.transition3[i](y_list[-1]))
else:
x_list.append(y_list[i])
x = self.stage4(x_list)

# Head Part
height, width = x[0].size(2), x[0].size(3)
x1 = F.interpolate(x[1], size=(height, width), mode='bilinear', align_corners=False)
x2 = F.interpolate(x[2], size=(height, width), mode='bilinear', align_corners=False)
x3 = F.interpolate(x[3], size=(height, width), mode='bilinear', align_corners=False)
x = torch.cat([x[0], x1, x2, x3], 1)
x = self.head(x)

return x

def init_weights(self, pretrained=''):
logger.info('=> init weights from normal distribution')
for m in self.modules():
if isinstance(m, nn.Conv2d):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.normal_(m.weight, std=0.001)
# nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
if os.path.isfile(pretrained):
pretrained_dict = torch.load(pretrained)
logger.info('=> loading pretrained model {}'.format(pretrained))
model_dict = self.state_dict()
pretrained_dict = {k: v for k, v in pretrained_dict.items()
if k in model_dict.keys()}
for k, _ in pretrained_dict.items():
logger.info(
'=> loading {} pretrained model {}'.format(k, pretrained))
model_dict.update(pretrained_dict)
self.load_state_dict(model_dict)


def get_face_alignment_net(config, **kwargs):

model = HighResolutionNet(config, **kwargs)
pretrained = config.MODEL.PRETRAINED if config.MODEL.INIT_WEIGHTS else ''
model.init_weights(pretrained=pretrained)

return model

# -*- coding: utf-8 -*-
# @Time : 2022/11/28 15:46
# @Author : admin
# @Email : wayne_lau@aliyun.com
# @File : hrnet1.py
# @Project : HRNet-Facial-Landmark-Detection-master

对于代码部分相对于来说结构还是比较复杂

进度过程

从拿到官方的源码以及数据后,进行了环境的配置以及安装相关的导入包,并调通使能够正常的运行起来;
然后进行对代码的观察以及进行理解每个函数的意义和作用,熟悉代码的每个区域做了什么处理;接着进行对数据集的一个制作,进行模拟官方数据集的格式,并进行调整网络的参数和要求的配置;
开始训练模型,将模型进行优化。
详细参观:https://up3dai.yuque.com/ionwb3/reports/dmfltk5cwmrt7wf9

个人遗憾

基于来咱公司之前所期望的工作内容并没有接触,相继进行其他工作内容,所以对工作进行的进度有所缓慢,未能将它完整的处理好也是个人的一个遗憾,但是在工作过程中也有许多收获,对自己成长增加了一点帮助吧,对新的数据处理过程中自己有所新的收获。

团队期望

在入职工作的一个月内,可以感受到我们团队每个人都是很友好的,有不懂的东西都会进行教导,有什么需求帮助都会力之所能的帮你,在我们小组内有很多可以学习的相关资料,闲暇时光可以去看看。
最后,希望AI组的各位能完成自己的项目,不留遗憾。加油!

相关文件位置

E:\work
相关文件都在work文件夹内

代码改动位置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# Modified by Ke Sun (sunk@mail.ustc.edu.cn), Tianheng Cheng(tianhengcheng@gmail.com)
# ------------------------------------------------------------------------------
#郭金玉
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from yacs.config import CfgNode as CN


_C = CN()

_C.OUTPUT_DIR = 'output'
_C.LOG_DIR = 'log'
_C.GPUS = (0, 0)
_C.WORKERS = 16
_C.PRINT_FREQ = 20
_C.AUTO_RESUME = False
_C.PIN_MEMORY = True

# Cudnn related params
_C.CUDNN = CN()
_C.CUDNN.BENCHMARK = True
_C.CUDNN.DETERMINISTIC = False
_C.CUDNN.ENABLED = True

# common params for NETWORK
_C.MODEL = CN()
_C.MODEL.NAME = 'hrnet'
_C.MODEL.INIT_WEIGHTS = True
_C.MODEL.PRETRAINED = ''
_C.MODEL.NUM_JOINTS = 16
_C.MODEL.TARGET_TYPE = 'Gaussian'
_C.MODEL.IMAGE_SIZE = [512, 512] # width * height
_C.MODEL.HEATMAP_SIZE = [128, 128] # width * height
_C.MODEL.SIGMA = 1.0
_C.MODEL.EXTRA = CN()

# High-Resoluion Net
_C.MODEL.EXTRA.PRETRAINED_LAYERS = ['*']
_C.MODEL.EXTRA.STEM_INPLANES = 128
_C.MODEL.EXTRA.FINAL_CONV_KERNEL = 1
_C.MODEL.EXTRA.WITH_HEAD = True

_C.MODEL.EXTRA.STAGE2 = CN()
_C.MODEL.EXTRA.STAGE2.NUM_MODULES = 1
_C.MODEL.EXTRA.STAGE2.NUM_BRANCHES = 2
_C.MODEL.EXTRA.STAGE2.NUM_BLOCKS = [4, 4]
_C.MODEL.EXTRA.STAGE2.NUM_CHANNELS = [32, 64]
_C.MODEL.EXTRA.STAGE2.BLOCK = 'BASIC'
_C.MODEL.EXTRA.STAGE2.FUSE_METHOD = 'SUM'

_C.MODEL.EXTRA.STAGE3 = CN()
_C.MODEL.EXTRA.STAGE3.NUM_MODULES = 1
_C.MODEL.EXTRA.STAGE3.NUM_BRANCHES = 3
_C.MODEL.EXTRA.STAGE3.NUM_BLOCKS = [4, 4, 4]
_C.MODEL.EXTRA.STAGE3.NUM_CHANNELS = [32, 64, 128]
_C.MODEL.EXTRA.STAGE3.BLOCK = 'BASIC'
_C.MODEL.EXTRA.STAGE3.FUSE_METHOD = 'SUM'

_C.MODEL.EXTRA.STAGE4 = CN()
_C.MODEL.EXTRA.STAGE4.NUM_MODULES = 1
_C.MODEL.EXTRA.STAGE4.NUM_BRANCHES = 4
_C.MODEL.EXTRA.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
_C.MODEL.EXTRA.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
_C.MODEL.EXTRA.STAGE4.BLOCK = 'BASIC'
_C.MODEL.EXTRA.STAGE4.FUSE_METHOD = 'SUM'

# DATASET related params
_C.DATASET = CN()
_C.DATASET.ROOT = ''
_C.DATASET.DATASET = '300w'
_C.DATASET.TRAINSET = ''
_C.DATASET.TESTSET = ''

# training data augmentation
_C.DATASET.FLIP = True
_C.DATASET.SCALE_FACTOR = 1.0

_C.DATASET.ROT_FACTOR = 30

# train
_C.TRAIN = CN()

_C.TRAIN.LR_FACTOR = 0.1
_C.TRAIN.LR_STEP = [15, 25, 35]
_C.TRAIN.LR = 0.0001

_C.TRAIN.OPTIMIZER = 'adam'
_C.TRAIN.MOMENTUM = 0.9
_C.TRAIN.WD = 0.0
_C.TRAIN.NESTEROV = False

_C.TRAIN.BEGIN_EPOCH = 0
_C.TRAIN.END_EPOCH = 30

_C.TRAIN.RESUME = True
_C.TRAIN.CHECKPOINT = ''

_C.TRAIN.BATCH_SIZE_PER_GPU = 0
_C.TRAIN.SHUFFLE = True

# testing
_C.TEST = CN()

# size of images for each device
_C.TEST.BATCH_SIZE_PER_GPU = 0


def update_config(cfg, args):
cfg.defrost()
cfg.merge_from_file(args.cfg)
cfg.freeze()


if __name__ == '__main__':
import sys
with open(sys.argv[1], 'w') as f:
print(_C, file=f)

# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# Modified by Ke Sun (sunk@mail.ustc.edu.cn), Tianheng Cheng(tianhengcheng@gmail.com)
# ------------------------------------------------------------------------------
#
# from __future__ import absolute_import
# from __future__ import division
# from __future__ import print_function
#
# from yacs.config import CfgNode as CN
#
#
# _C = CN()
#
# _C.OUTPUT_DIR = 'output'
# _C.LOG_DIR = 'log'
# _C.GPUS = (0, 0)
# _C.WORKERS = 16
# _C.PRINT_FREQ = 20
# _C.AUTO_RESUME = False
# _C.PIN_MEMORY = True
#
# # Cudnn related params
# _C.CUDNN = CN()
# _C.CUDNN.BENCHMARK = True
# _C.CUDNN.DETERMINISTIC = False
# _C.CUDNN.ENABLED = True
#
# # common params for NETWORK
# _C.MODEL = CN()
# _C.MODEL.NAME = 'hrnet'
# _C.MODEL.INIT_WEIGHTS = True
# _C.MODEL.PRETRAINED = ''
# _C.MODEL.NUM_JOINTS = 17
# _C.MODEL.TARGET_TYPE = 'Gaussian'
# _C.MODEL.IMAGE_SIZE = [256, 256] # width * height
# _C.MODEL.HEATMAP_SIZE = [64, 64] # width * height
# _C.MODEL.SIGMA = 1.5
# _C.MODEL.EXTRA = CN()
#
# # High-Resoluion Net
# _C.MODEL.EXTRA.PRETRAINED_LAYERS = ['*']
# _C.MODEL.EXTRA.STEM_INPLANES = 64
# _C.MODEL.EXTRA.FINAL_CONV_KERNEL = 1
# _C.MODEL.EXTRA.WITH_HEAD = True
#
# _C.MODEL.EXTRA.STAGE2 = CN()
# _C.MODEL.EXTRA.STAGE2.NUM_MODULES = 1
# _C.MODEL.EXTRA.STAGE2.NUM_BRANCHES = 2
# _C.MODEL.EXTRA.STAGE2.NUM_BLOCKS = [4, 4]
# _C.MODEL.EXTRA.STAGE2.NUM_CHANNELS = [18, 36]
# _C.MODEL.EXTRA.STAGE2.BLOCK = 'BASIC'
# _C.MODEL.EXTRA.STAGE2.FUSE_METHOD = 'SUM'
#
# _C.MODEL.EXTRA.STAGE3 = CN()
# _C.MODEL.EXTRA.STAGE3.NUM_MODULES = 1
# _C.MODEL.EXTRA.STAGE3.NUM_BRANCHES = 3
# _C.MODEL.EXTRA.STAGE3.NUM_BLOCKS = [4, 4, 4]
# _C.MODEL.EXTRA.STAGE3.NUM_CHANNELS = [18, 36, 72]
# _C.MODEL.EXTRA.STAGE3.BLOCK = 'BASIC'
# _C.MODEL.EXTRA.STAGE3.FUSE_METHOD = 'SUM'
#
# _C.MODEL.EXTRA.STAGE4 = CN()
# _C.MODEL.EXTRA.STAGE4.NUM_MODULES = 1
# _C.MODEL.EXTRA.STAGE4.NUM_BRANCHES = 4
# _C.MODEL.EXTRA.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
# _C.MODEL.EXTRA.STAGE4.NUM_CHANNELS = [18, 32, 72, 144]
# _C.MODEL.EXTRA.STAGE4.BLOCK = 'BASIC'
# _C.MODEL.EXTRA.STAGE4.FUSE_METHOD = 'SUM'
#
# # DATASET related params
# _C.DATASET = CN()
# _C.DATASET.ROOT = ''
# _C.DATASET.DATASET = 'AFLW'
# _C.DATASET.TRAINSET = ''
# _C.DATASET.TESTSET = ''
#
# # training data augmentation
# _C.DATASET.FLIP = True
# _C.DATASET.SCALE_FACTOR = 0.25
# _C.DATASET.ROT_FACTOR = 30
#
# # train
# _C.TRAIN = CN()
#
# _C.TRAIN.LR_FACTOR = 0.1
# _C.TRAIN.LR_STEP = [30, 50]
# _C.TRAIN.LR = 0.0001
#
# _C.TRAIN.OPTIMIZER = 'adam'
# _C.TRAIN.MOMENTUM = 0.0
# _C.TRAIN.WD = 0.0
# _C.TRAIN.NESTEROV = False
#
# _C.TRAIN.BEGIN_EPOCH = 0
# _C.TRAIN.END_EPOCH = 60
#
# _C.TRAIN.RESUME = True
# _C.TRAIN.CHECKPOINT = ''
#
# _C.TRAIN.BATCH_SIZE_PER_GPU = 0
# _C.TRAIN.SHUFFLE = True
#
# # testing
# _C.TEST = CN()
#
# # size of images for each device
# _C.TEST.BATCH_SIZE_PER_GPU = 1
#
#
# def update_config(cfg, args):
# cfg.defrost()
# cfg.merge_from_file(args.cfg)
# cfg.freeze()
#
#
# if __name__ == '__main__':
# import sys
# with open(sys.argv[1], 'w') as f:
# print(_C, file=f)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Created by Tianheng Cheng(tianhengcheng@gmail.com), Yang Zhao
# ------------------------------------------------------------------------------
#郭金玉
import math

import torch
import numpy as np

from ..utils.transforms import transform_preds


def get_preds(scores):
"""
get predictions from score maps in torch Tensor
return type: torch.LongTensor
"""
assert scores.dim() == 4, 'Score maps should be 4-dim'
maxval, idx = torch.max(scores.view(scores.size(0), scores.size(1), -1), 2)

maxval = maxval.view(scores.size(0), scores.size(1), 1)
idx = idx.view(scores.size(0), scores.size(1), 1) + 1

preds = idx.repeat(1, 1, 2).float()

preds[:, :, 0] = (preds[:, :, 0] - 1) % scores.size(3) + 1
preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / scores.size(3)) + 1

pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
preds *= pred_mask
return preds


def compute_nme(preds, meta):

targets = meta['pts']
preds = preds.numpy()
target = targets.cpu().numpy()

N = preds.shape[0]
L = preds.shape[1]
rmse = np.zeros(N)

# **** baocuo
# for i in range(N):
# pts_pred, pts_gt = preds[i, ], target[i, ]
# if L == 19: # aflw
# interocular = meta['box_size'][i]
# elif L == 29: # cofw
# interocular = np.linalg.norm(pts_gt[8, ] - pts_gt[9, ])
# elif L == 68: # 300w
# # interocular
# interocular = np.linalg.norm(pts_gt[36, ] - pts_gt[45, ])
# elif L == 98:
# interocular = np.linalg.norm(pts_gt[60, ] - pts_gt[72, ])
# else:
# raise ValueError('Number of landmarks is wrong')
# rmse[i] = np.sum(np.linalg.norm(pts_pred - pts_gt, axis=1)) / (interocular * L)

return rmse


def decode_preds(output, center, scale, res):
coords = get_preds(output) # float type

coords = coords.cpu()
# pose-processing
for n in range(coords.size(0)):
for p in range(coords.size(1)):
hm = output[n][p]
px = int(math.floor(coords[n][p][0]))
py = int(math.floor(coords[n][p][1]))
if (px > 1) and (px < res[0]) and (py > 1) and (py < res[1]):
diff = torch.Tensor([hm[py - 1][px] - hm[py - 1][px - 2], hm[py][px - 1]-hm[py - 2][px - 1]])
coords[n][p] += diff.sign() * .25
coords += 0.5
preds = coords.clone()

# Transform back
for i in range(coords.size(0)):
preds[i] = transform_preds(coords[i], center[i], scale[i], res)

if preds.dim() < 3:
preds = preds.view(1, preds.size())

return preds
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Created by Tianheng Cheng(tianhengcheng@gmail.com)
# ------------------------------------------------------------------------------
#郭金玉
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time
import logging

import torch
import numpy as np

from .evaluation import decode_preds, compute_nme

logger = logging.getLogger(__name__)


class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
self.reset()

def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0

def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count


def train(config, train_loader, model, critertion, optimizer,
epoch, writer_dict):

batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()

model.train()
nme_count = 0
nme_batch_sum = 0

end = time.time()

for i, (inp, target, meta) in enumerate(train_loader):
# measure data time计算时间
data_time.update(time.time()-end)
# inp = inp[:, :, :512,:512]#杨
print(inp.shape, target.shape)
# compute the output#计算输出

output = model(inp)
print(output.shape)
target = target.cuda(non_blocking=True)


loss = critertion(output, target)
print('-----------------------',loss)
# NME
score_map = output.data.cpu()
preds = decode_preds(score_map, meta['center'], meta['scale'], [128, 128])
print('preds-------------------------',preds.shape)
nme_batch = compute_nme(preds, meta)
nme_batch_sum = nme_batch_sum + np.sum(nme_batch)
nme_count = nme_count + preds.size(0)

# optimize优化
optimizer.zero_grad()
loss.backward()
optimizer.step()

losses.update(loss.item(), inp.size(0))

batch_time.update(time.time()-end)
if i % config.PRINT_FREQ == 0:
msg = 'Epoch: [{0}][{1}/{2}]\t' \
'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
'Speed {speed:.1f} samples/s\t' \
'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
'Loss {loss.val:.5f} ({loss.avg:.5f})\t'.format(
epoch, i, len(train_loader), batch_time=batch_time,
speed=inp.size(0)/batch_time.val,
data_time=data_time, loss=losses)
logger.info(msg)

if writer_dict:
writer = writer_dict['writer']
global_steps = writer_dict['train_global_steps']
writer.add_scalar('train_loss', losses.val, global_steps)
writer_dict['train_global_steps'] = global_steps + 1

end = time.time()
nme = nme_batch_sum / nme_count
msg = 'Train Epoch {} time:{:.4f} loss:{:.4f} nme:{:.4f}'\
.format(epoch, batch_time.avg, losses.avg, nme)
logger.info(msg)


def validate(config, val_loader, model, criterion, epoch, writer_dict):
batch_time = AverageMeter()
data_time = AverageMeter()

losses = AverageMeter()

num_classes = config.MODEL.NUM_JOINTS
predictions = torch.zeros((len(val_loader.dataset), num_classes, 2))

model.eval()

nme_count = 0
nme_batch_sum = 0
count_failure_008 = 0
count_failure_010 = 0
end = time.time()

with torch.no_grad():
for i, (inp, target, meta) in enumerate(val_loader):
data_time.update(time.time() - end)

output = model(inp)
target = target.cuda(non_blocking=True)
score_map = output.data.cpu()
print("#########/n",output,target,score_map)

# loss

loss = criterion(output, target)

preds = decode_preds(score_map, meta['center'], meta['scale'], [128, 128])
# NME
nme_temp = compute_nme(preds, meta)
# Failure Rate under different threshold 不同阈值下的失效率
failure_008 = (nme_temp > 0.08).sum()
failure_010 = (nme_temp > 0.10).sum()
count_failure_008 += failure_008
count_failure_010 += failure_010

nme_batch_sum += np.sum(nme_temp)
nme_count = nme_count + preds.size(0)
for n in range(score_map.size(0)):
predictions[meta['index'][n], :, :] = preds[n, :, :]

losses.update(loss.item(), inp.size(0))

# measure elapsed time 测量经过的时间
batch_time.update(time.time() - end)
end = time.time()

nme = nme_batch_sum / nme_count
failure_008_rate = count_failure_008 / nme_count
failure_010_rate = count_failure_010 / nme_count

msg = 'Test Epoch {} time:{:.4f} loss:{:.4f} nme:{:.4f} [008]:{:.4f} ' \
'[010]:{:.4f}'.format(epoch, batch_time.avg, losses.avg, nme,
failure_008_rate, failure_010_rate)
logger.info(msg)

if writer_dict:
writer = writer_dict['writer']
global_steps = writer_dict['valid_global_steps']
writer.add_scalar('valid_loss', losses.avg, global_steps)
writer.add_scalar('valid_nme', nme, global_steps)
writer_dict['valid_global_steps'] = global_steps + 1

return nme, predictions


def inference(config, data_loader, model):
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()

num_classes = config.MODEL.NUM_JOINTS
predictions = torch.zeros((len(data_loader.dataset), num_classes, 2))

model.eval()

nme_count = 0
nme_batch_sum = 0
count_failure_008 = 0
count_failure_010 = 0
end = time.time()

with torch.no_grad():
for i, (inp, target, meta) in enumerate(data_loader):
data_time.update(time.time() - end)
output = model(inp)
score_map = output.data.cpu()
preds = decode_preds(score_map, meta['center'], meta['scale'], [128, 128])

# NME
nme_temp = compute_nme(preds, meta)

failure_008 = (nme_temp > 0.08).sum()
failure_010 = (nme_temp > 0.10).sum()
count_failure_008 += failure_008
count_failure_010 += failure_010

nme_batch_sum += np.sum(nme_temp)
nme_count = nme_count + preds.size(0)
for n in range(score_map.size(0)):
predictions[meta['index'][n], :, :] = preds[n, :, :]

# measure elapsed time测量经过的时间
batch_time.update(time.time() - end)
end = time.time()

nme = nme_batch_sum / nme_count
failure_008_rate = count_failure_008 / nme_count
failure_010_rate = count_failure_010 / nme_count

msg = 'Test Results time:{:.4f} loss:{:.4f} nme:{:.4f} [008]:{:.4f} ' \
'[010]:{:.4f}'.format(batch_time.avg, losses.avg, nme,
failure_008_rate, failure_010_rate)
logger.info(msg)

return nme, predictions
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Created by Tianheng Cheng(tianhengcheng@gmail.com), Yang Zhao
# ------------------------------------------------------------------------------
#郭金玉
import os
import random

import torch
import torch.utils.data as data
import pandas as pd
from PIL import Image
import numpy as np

from ..utils.transforms import fliplr_joints, generate_target, transform_pixel


class Face300W(data.Dataset):

def __init__(self, cfg, is_train=True, transform=None):
# specify annotation file for dataset
if is_train:
self.csv_file = cfg.DATASET.TRAINSET
else:
self.csv_file = cfg.DATASET.TESTSET

self.is_train = is_train
self.transform = transform
self.data_root = cfg.DATASET.ROOT
self.input_size = cfg.MODEL.IMAGE_SIZE
self.output_size = cfg.MODEL.HEATMAP_SIZE
self.sigma = cfg.MODEL.SIGMA
self.scale_factor = cfg.DATASET.SCALE_FACTOR
self.rot_factor = cfg.DATASET.ROT_FACTOR
self.label_type = cfg.MODEL.TARGET_TYPE
self.flip = cfg.DATASET.FLIP

# load annotations
self.landmarks_frame = pd.read_csv(self.csv_file)

self.mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
self.std = np.array([0.229, 0.224, 0.225], dtype=np.float32)

def __len__(self):
return len(self.landmarks_frame)

def __getitem__(self, idx):

image_path = os.path.join(self.data_root,
self.landmarks_frame.iloc[idx, 0])
scale = self.landmarks_frame.iloc[idx, 1]

center_w = self.landmarks_frame.iloc[idx, 2]
center_h = self.landmarks_frame.iloc[idx, 3]
center = torch.Tensor([center_w, center_h])

pts = self.landmarks_frame.iloc[idx, 4:].values
pts = pts.astype('float').reshape(-1, 2)

scale *= 1.0
nparts = pts.shape[0]
img = np.array(Image.open(image_path).convert('RGB'), dtype=np.float32)

r = 0
if self.is_train:
scale = scale * (random.uniform(1 - self.scale_factor,
1 + self.scale_factor))
r = random.uniform(-self.rot_factor, self.rot_factor) \
if random.random() <= 0.6 else 0
# baocuo
# if random.random() <= 0.5 and self.flip:
# img = np.fliplr(img)
# pts = fliplr_joints(pts, width=img.shape[1], dataset='300W')
# center[0] = img.shape[1] - center[0]



target = np.zeros((nparts, self.output_size[0], self.output_size[1]))
tpts = pts.copy()

for i in range(nparts):
if tpts[i, 1] > 0:
tpts[i, 0:2] = transform_pixel(tpts[i, 0:2]+1, center,
scale, self.output_size, rot=r)
target[i] = generate_target(target[i], tpts[i]-1, self.sigma,
label_type=self.label_type)
img = img.astype(np.float32)
img = (img/255.0 - self.mean) / self.std
img = img.transpose([2, 0, 1])
target = torch.Tensor(target)
tpts = torch.Tensor(tpts)
center = torch.Tensor(center)

meta = {'index': idx, 'center': center, 'scale': scale,
'pts': torch.Tensor(pts), 'tpts': tpts}

return img, target, meta


if __name__ == '__main__':

pass
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def __init__(self, config, **kwargs):
self.inplanes = 64
extra = config.MODEL.EXTRA
super(HighResolutionNet, self).__init__()

# stem net
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1,
bias=False)

#self.conv1 = nn.Conv2d(3, 128, kernel_size=3, stride=2, padding=1,
bias=False)

self.bn1 = BatchNorm2d(64, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1,
bias=False)
self.bn2 = BatchNorm2d(64, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.sf = nn.Softmax(dim=1)
self.layer1 = self._make_layer(Bottleneck, 64, 64, 4)

self.stage2_cfg = extra['STAGE2']
num_channels = self.stage2_cfg['NUM_CHANNELS']
block = blocks_dict[self.stage2_cfg['BLOCK']]
num_channels = [
num_channels[i] * block.expansion for i in range(len(num_channels))]

self.transition1 = self._make_transition_layer(
[256], num_channels)
#self.transition1 = self._make_transition_layer(
[512], num_channels)

self.stage2, pre_stage_channels = self._make_stage(
self.stage2_cfg, num_channels)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Created by Tianheng Cheng(tianhengcheng@gmail.com), Yang Zhao
# ------------------------------------------------------------------------------
#郭金玉
import cv2
import torch
import scipy
import scipy.misc
import numpy as np
# import os
# os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import PIL

MATCHED_PARTS = {
"300W": ([1, 17], [2, 16], [3, 15], [4, 14], [5, 13], [6, 12], [7, 11], [8, 10],
[18, 27], [19, 26], [20, 25], [21, 24], [22, 23],
[32, 36], [33, 35],
[37, 46], [38, 45], [39, 44], [40, 43], [41, 48], [42, 47],
[49, 55], [50, 54], [51, 53], [62, 64], [61, 65], [68, 66], [59, 57], [60, 56]),
"AFLW": ([1, 6], [2, 5], [3, 4],
[7, 12], [8, 11], [9, 10],
[13, 15],
[16, 18]),
"COFW": ([1, 2], [5, 7], [3, 4], [6, 8], [9, 10], [11, 12], [13, 15], [17, 18], [14, 16], [19, 20], [23, 24]),
"WFLW": ([0, 32], [1, 31], [2, 30], [3, 29], [4, 28], [5, 27], [6, 26], [7, 25], [8, 24], [9, 23], [10, 22],
[11, 21], [12, 20], [13, 19], [14, 18], [15, 17], # check
[33, 46], [34, 45], [35, 44], [36, 43], [37, 42], [38, 50], [39, 49], [40, 48], [41, 47], # elbrow
[60, 72], [61, 71], [62, 70], [63, 69], [64, 68], [65, 75], [66, 74], [67, 73],
[55, 59], [56, 58],
[76, 82], [77, 81], [78, 80], [87, 83], [86, 84],
[88, 92], [89, 91], [95, 93], [96, 97])}


def fliplr_joints(x, width, dataset='aflw'):
"""
flip coords
"""
matched_parts = MATCHED_PARTS[dataset]
# Flip horizontal
x[:, 0] = width - x[:, 0]

if dataset == 'WFLW':
for pair in matched_parts:
tmp = x[pair[0], :].copy()
x[pair[0], :] = x[pair[1], :]
x[pair[1], :] = tmp
else:
for pair in matched_parts:
tmp = x[pair[0] - 1, :].copy()
x[pair[0] - 1, :] = x[pair[1] - 1, :]
x[pair[1] - 1, :] = tmp
return x


def get_3rd_point(a, b):
direct = a - b
return b + np.array([-direct[1], direct[0]], dtype=np.float32)


def get_dir(src_point, rot_rad):
sn, cs = np.sin(rot_rad), np.cos(rot_rad)

src_result = [0, 0]
src_result[0] = src_point[0] * cs - src_point[1] * sn
src_result[1] = src_point[0] * sn + src_point[1] * cs

return src_result


def get_affine_transform(
center, scale, rot, output_size,
shift=np.array([0, 0], dtype=np.float32), inv=0):
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
print(scale)
scale = np.array([scale, scale])

scale_tmp = scale * 200.0
src_w = scale_tmp[0]
dst_w = output_size[0]
dst_h = output_size[1]

rot_rad = np.pi * rot / 180
src_dir = get_dir([0, src_w * -0.5], rot_rad)
dst_dir = np.array([0, dst_w * -0.5], np.float32)

src = np.zeros((3, 2), dtype=np.float32)
dst = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center + scale_tmp * shift
src[1, :] = center + src_dir + scale_tmp * shift
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir

src[2:, :] = get_3rd_point(src[0, :], src[1, :])
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])

if inv:
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
else:
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))

return trans


def crop_v2(img, center, scale, output_size, rot=0):
trans = get_affine_transform(center, scale, rot, output_size)

dst_img = cv2.warpAffine(
img, trans, (int(output_size[0]), int(output_size[1])),
flags=cv2.INTER_LINEAR
)

return dst_img


def get_transform(center, scale, output_size, rot=0):
"""
General image processing functions
"""
# Generate transformation matrix
h = 200 * scale
t = np.zeros((3, 3))
t[0, 0] = float(output_size[1]) / h
t[1, 1] = float(output_size[0]) / h
t[0, 2] = output_size[1] * (-float(center[0]) / h + .5)
t[1, 2] = output_size[0] * (-float(center[1]) / h + .5)
t[2, 2] = 1
if not rot == 0:
rot = -rot # To match direction of rotation from cropping
rot_mat = np.zeros((3, 3))
rot_rad = rot * np.pi / 180
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
rot_mat[0, :2] = [cs, -sn]
rot_mat[1, :2] = [sn, cs]
rot_mat[2, 2] = 1
# Need to rotate around center
t_mat = np.eye(3)
t_mat[0, 2] = -output_size[1] / 2
t_mat[1, 2] = -output_size[0] / 2
t_inv = t_mat.copy()
t_inv[:2, 2] *= -1
t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
return t


def transform_pixel(pt, center, scale, output_size, invert=0, rot=0):
# Transform pixel location to different reference
t = get_transform(center, scale, output_size, rot=rot)
if invert:
t = np.linalg.inv(t)
new_pt = np.array([pt[0] - 1, pt[1] - 1, 1.]).T
new_pt = np.dot(t, new_pt)
return new_pt[:2].astype(int) + 1


def transform_preds(coords, center, scale, output_size):
for p in range(coords.size(0)):
coords[p, 0:2] = torch.tensor(transform_pixel(coords[p, 0:2], center, scale, output_size, 1, 0))
return coords


def crop(img, center, scale, output_size, rot=0):
center_new = center.clone()

# Preprocessing for efficient cropping
ht, wd = img.shape[0], img.shape[1]
sf = scale * 200.0 / output_size[0]
if sf < 2:
sf = 1
else:
new_size = int(np.math.floor(max(ht, wd) / sf))
new_ht = int(np.math.floor(ht / sf))
new_wd = int(np.math.floor(wd / sf))
if new_size < 2:
return torch.zeros(output_size[0], output_size[1], img.shape[2]) \
if len(img.shape) > 2 else torch.zeros(output_size[0], output_size[1])
else:
# img = scipy.misc.imresize(img, [new_ht, new_wd]) # (0-1)-->(0-255) #old
img = np.array(PIL.Image.fromarray(np.uint8(img)).resize((new_ht, new_wd)))
center_new[0] = center_new[0] * 1.0 / sf
center_new[1] = center_new[1] * 1.0 / sf
scale = scale / sf

# Upper left point
ul = np.array(transform_pixel([0, 0], center_new, scale, output_size, invert=1))
# Bottom right point
br = np.array(transform_pixel(output_size, center_new, scale, output_size, invert=1))

# Padding so that when rotated proper amount of context is included
pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2)
if not rot == 0:
ul -= pad
br += pad

new_shape = [br[1] - ul[1], br[0] - ul[0]]
if len(img.shape) > 2:
new_shape += [img.shape[2]]

new_img = np.zeros(new_shape, dtype=np.float32)

# Range to fill new array
new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0]
new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1]
# Range to sample from original image
old_x = max(0, ul[0]), min(len(img[0]), br[0])
old_y = max(0, ul[1]), min(len(img), br[1])
new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]]

if not rot == 0:
# Remove padding
from skimage import transform
#********
# new_img = scipy.misc.imrotate(new_img, rot) # old
new_img = transform.rotate(new_img, rot)
# *********
# new_img = scipy.ndimage.interpolation.rotate(new_img, rot) # new

new_img = new_img[pad:-pad, pad:-pad]
# new_img = scipy.misc.imresize(new_img, output_size) # old scipy=1.2.1 Pillow=6.0.0
# print("*************************output_size:", tuple(output_size), type(new_img))
# ********
"https://www.cnblogs.com/Timeouting-Study/p/12356833.html"
new_img = np.array(PIL.Image.fromarray(np.uint8(new_img)).resize(tuple(output_size)))
# new_img = np.array(PIL.Image.fromarray(new_img).resize((output_size[0], output_size[1])))
return new_img


def generate_target(img, pt, sigma, label_type='Gaussian'):
# Check that any part of the gaussian is in-bounds
tmp_size = sigma * 3
ul = [int(pt[0] - tmp_size), int(pt[1] - tmp_size)]
br = [int(pt[0] + tmp_size + 1), int(pt[1] + tmp_size + 1)]
if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
br[0] < 0 or br[1] < 0):
# If not, just return the image as is
return img

# Generate gaussian
size = 2 * tmp_size + 1
x = np.arange(0, size, 1, np.float32)
y = x[:, np.newaxis]
x0 = y0 = size // 2
# The gaussian is not normalized, we want the center value to equal 1
if label_type == 'Gaussian':
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
else:
g = sigma / (((x - x0) ** 2 + (y - y0) ** 2 + sigma ** 2) ** 1.5)

# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], img.shape[1])
img_y = max(0, ul[1]), min(br[1], img.shape[0])

img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
return img

新增代码
gen-data.pycsv_rw.pydata-check.py

欢迎关注我的其它发布渠道