因为之前正好看了CMU在CVPR2017上的论文《Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields》
,而且他们提供了训练好的模型。所以就直接用CMU训练的模型在AI challenge的数据集上做了测试。最后没有使用AI challenge训练集训练的模型在AI challenge上的得分是0.1667,可以看作是一个baseline。
以下是预处理的说明以及加入预处理程序的源代码。openpose的源代码使用#openpose ##openpose标注出来了,剩下的就是AI challenge的预处理程序。
在Google Cloud 上使用1片NVIDIA Tesla K80 跑完AI challenge的测试集大约需要24小时,4秒左右处理一副图。
AI challenge测试要求的关键点顺序是:1右肩,2右肘,3右腕,4左肩,5左肘,6左腕,7右髋,8右膝,9右踝,10左髋,11左膝,12左踝,13头顶,14脖子
openpose源码中subset输出的关键点顺序是:1鼻子,2脖子,3右肩,4右肘,5右腕,6左肩,7左肘,8左腕,9右髋,10右膝,11右踝,12左髋,13左膝,14左踝,15左眼,16右眼,17左耳,18右耳,19 pt19
函数 subset2AIsubset, all_peaks2all_peaks_1d, listMultiKeypoints 负责把openpose的关键点转换成AI challenge 的关键点。
当然还得按照官网上的要求输出特定格式的JSON文件,如下所示:
[
{
"image_id": "a0f6bdc065a602b7b84a67fb8d14ce403d902e0d",
"keypoint_annotations": {
"human1": [261, 294, 1, 281, 328, 1, 0, 0, 0, 213, 295, 1, 208, 346, 1, 192, 335, 1, 245, 375, 1, 255, 432, 1, 244, 494, 1, 221, 379, 1, 219, 442, 1, 226, 491, 1, 226, 256, 1, 231, 284, 1],
"human2": [313, 301, 1, 305, 337, 1, 321, 345, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 313, 359, 1, 320, 409, 1, 311, 454, 1, 0, 0, 0, 330, 409, 1, 324, 446, 1, 337, 284, 1, 327, 302, 1],
"human3": [373, 304, 1, 346, 286, 1, 332, 263, 1, 0, 0, 0, 0, 0, 0, 345, 313, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 363, 386, 1, 361, 424, 1, 361, 475, 1, 365, 273, 1, 369, 297, 1],
...
}
}
...
]
1 #import numpy as np
2 import json
3 import os
4 #openpose
5 import keras
6 from keras.models import Sequential
7 from keras.models import Model
8 from keras.layers import Input, Dense, Activation
9 from keras.layers.convolutional import Conv2D
10 from keras.layers.pooling import MaxPooling2D
11 from keras.layers.normalization import BatchNormalization
12 from keras.layers.merge import Concatenate
13 from config_reader import config_reader
14 import scipy
15
16 import cv2
17 import numpy as np
18 np.seterr(divide='ignore', invalid='ignore')
19 import util
20 import math
21 from numpy import ma
22 from scipy.ndimage.filters import gaussian_filter
23 ##openpose
24 #openpose
25 def relu(x):
26 return Activation('relu')(x)
27
28 def conv(x, nf, ks, name):
29 x1 = Conv2D(nf, (ks, ks), padding='same', name=name)(x)
30 return x1
31
32 def pooling(x, ks, st, name):
33 x = MaxPooling2D((ks, ks), strides=(st, st), name=name)(x)
34 return x
35
36 def vgg_block(x):
37
38 # Block 1
39 x = conv(x, 64, 3, "conv1_1")
40 x = relu(x)
41 x = conv(x, 64, 3, "conv1_2")
42 x = relu(x)
43 x = pooling(x, 2, 2, "pool1_1")
44
45 # Block 2
46 x = conv(x, 128, 3, "conv2_1")
47 x = relu(x)
48 x = conv(x, 128, 3, "conv2_2")
49 x = relu(x)
50 x = pooling(x, 2, 2, "pool2_1")
51
52 # Block 3
53 x = conv(x, 256, 3, "conv3_1")
54 x = relu(x)
55 x = conv(x, 256, 3, "conv3_2")
56 x = relu(x)
57 x = conv(x, 256, 3, "conv3_3")
58 x = relu(x)
59 x = conv(x, 256, 3, "conv3_4")
60 x = relu(x)
61 x = pooling(x, 2, 2, "pool3_1")
62
63 # Block 4
64 x = conv(x, 512, 3, "conv4_1")
65 x = relu(x)
66 x = conv(x, 512, 3, "conv4_2")
67 x = relu(x)
68
69 # Additional non vgg layers
70 x = conv(x, 256, 3, "conv4_3_CPM")
71 x = relu(x)
72 x = conv(x, 128, 3, "conv4_4_CPM")
73 x = relu(x)
74
75 return x
76
77 def stage1_block(x, num_p, branch):
78
79 # Block 1
80 x = conv(x, 128, 3, "conv5_1_CPM_L%d" % branch)
81 x = relu(x)
82 x = conv(x, 128, 3, "conv5_2_CPM_L%d" % branch)
83 x = relu(x)
84 x = conv(x, 128, 3, "conv5_3_CPM_L%d" % branch)
85 x = relu(x)
86 x = conv(x, 512, 1, "conv5_4_CPM_L%d" % branch)
87 x = relu(x)
88 x = conv(x, num_p, 1, "conv5_5_CPM_L%d" % branch)
89
90 return x
91
92 def stageT_block(x, num_p, stage, branch):
93
94 # Block 1
95 x = conv(x, 128, 7, "Mconv1_stage%d_L%d" % (stage, branch))
96 x = relu(x)
97 x = conv(x, 128, 7, "Mconv2_stage%d_L%d" % (stage, branch))
98 x = relu(x)
99 x = conv(x, 128, 7, "Mconv3_stage%d_L%d" % (stage, branch))
100 x = relu(x)
101 x = conv(x, 128, 7, "Mconv4_stage%d_L%d" % (stage, branch))
102 x = relu(x)
103 x = conv(x, 128, 7, "Mconv5_stage%d_L%d" % (stage, branch))
104 x = relu(x)
105 x = conv(x, 128, 1, "Mconv6_stage%d_L%d" % (stage, branch))
106 x = relu(x)
107 x = conv(x, num_p, 1, "Mconv7_stage%d_L%d" % (stage, branch))
108
109 return x
110 ##openpose
111
112 def subset2AIsubset(t, numPersons):
113 AIsubset=[]
114 for j in xrange(numPersons):
115 tempsubset=[]
116 for i in xrange(12):
117 #20
118 #print(i+2)
119 tempsubset.append(t[j][i+2])
120
121 tempsubset.append(t[j][0])
122 tempsubset.append(t[j][1])
123 #print(AIsubset)
124 AIsubset.append(tempsubset)
125 return AIsubset
126
127 def all_peaks2all_peaks_1d(all_peaks):
128 all_peaks_1d=[]
129 for item in all_peaks:
130 for item1 in item:
131 all_peaks_1d.append(item1)
132 return all_peaks_1d
133
134 def listMultiKeypoints(all_peaks_1d, numPersons):
135 multi_keypoints=[]
136 for i in xrange(numPersons):
137 sp_keypoints=[]
138 for j in xrange(14):
139 if(AIsubset[i][j]== -1.):
140 sp_keypoints.append(0)
141 sp_keypoints.append(0)
142 sp_keypoints.append(0)
143 else:
144 sp_keypoints.append(all_peaks_1d[int(AIsubset[i][j])][0])
145 sp_keypoints.append(all_peaks_1d[int(AIsubset[i][j])][1])
146 sp_keypoints.append(1)
147 #print(sp_keypoints)
148 multi_keypoints.append(sp_keypoints)
149 return multi_keypoints
150
151 def nPersons(t):
152 return len(t)
153
154 def listHuman(nPersons):
155 list_human=[]
156 for i in xrange(numPersons):
157 list_human.append('human'+str(i+1))
158 return list_human
159
160
161
162 #openpose
163 weights_path = "model/keras/model.h5"
164
165 input_shape = (None,None,3)
166
167 img_input = Input(shape=input_shape)
168
169 stages = 6
170 np_branch1 = 38
171 np_branch2 = 19
172
173 # VGG
174 stage0_out = vgg_block(img_input)
175
176 # stage 1
177 stage1_branch1_out = stage1_block(stage0_out, np_branch1, 1)
178 stage1_branch2_out = stage1_block(stage0_out, np_branch2, 2)
179 x = Concatenate()([stage1_branch1_out, stage1_branch2_out, stage0_out])
180
181 # stage t >= 2
182 for sn in range(2, stages + 1):
183 stageT_branch1_out = stageT_block(x, np_branch1, sn, 1)
184 stageT_branch2_out = stageT_block(x, np_branch2, sn, 2)
185 if (sn < stages):
186 x = Concatenate()([stageT_branch1_out, stageT_branch2_out, stage0_out])
187
188 model = Model(img_input, [stageT_branch1_out, stageT_branch2_out])
189 model.load_weights(weights_path)
190 ##openpose
191
192 #openpose
193 # find connection in the specified sequence, center 29 is in the position 15
194 limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \
195 [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \
196 [1,16], [16,18], [3,17], [6,18]]
197 # the middle joints heatmap correpondence
198 mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \
199 [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \
200 [55,56], [37,38], [45,46]]
201 ##openpose
202
203 path = "./test0"
204 files = os.listdir(path)
205 list_image_names=[]
206 final_results=[]
207 num_processed_images=0.
208 total_images=30000.
209 for file in files:
210 num_processed_images+=1
211 print('file:',file)
212 print('number of image:',num_processed_images)
213 print('%.2f%%'%(num_processed_images/total_images*100))
214 list_image_names.append(str(file)[:-4])
215 #openpose
216 test_image = './test0/'+file
217 #test_image = 'sample_images/000a902c8674739c97f188157c63d709b45b7595.jpg'
218 oriImg = cv2.imread(test_image)
219
220 param, model_params = config_reader()
221 multiplier = [x * model_params['boxsize'] / oriImg.shape[0] for x in param['scale_search']]
222 heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
223 paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
224
225 for m in range(len(multiplier)):
226 scale = multiplier[m]
227 imageToTest = cv2.resize(oriImg, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
228 imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_params['stride'], model_params['padValue'])
229
230 input_img = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,0,1,2))/256 - 0.5; # required shape (1, width, height, channels)
231 print("Input shape: " + str(input_img.shape))
232
233 output_blobs = model.predict(input_img)
234 print("Output shape (heatmap): " + str(output_blobs[1].shape))
235
236 # extract outputs, resize, and remove padding
237 heatmap = np.squeeze(output_blobs[1]) # output 1 is heatmaps
238 heatmap = cv2.resize(heatmap, (0,0), fx=model_params['stride'], fy=model_params['stride'], interpolation=cv2.INTER_CUBIC)
239 heatmap = heatmap[:imageToTest_padded.shape[0]-pad[2], :imageToTest_padded.shape[1]-pad[3], :]
240 heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
241
242 paf = np.squeeze(output_blobs[0]) # output 0 is PAFs
243 paf = cv2.resize(paf, (0,0), fx=model_params['stride'], fy=model_params['stride'], interpolation=cv2.INTER_CUBIC)
244 paf = paf[:imageToTest_padded.shape[0]-pad[2], :imageToTest_padded.shape[1]-pad[3], :]
245 paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
246
247 heatmap_avg = heatmap_avg + heatmap / len(multiplier)
248 paf_avg = paf_avg + paf / len(multiplier)
249 ##openpose
250 #openpose
251 U = paf_avg[:,:,16] * -1
252 V = paf_avg[:,:,17]
253 X, Y = np.meshgrid(np.arange(U.shape[1]), np.arange(U.shape[0]))
254 M = np.zeros(U.shape, dtype='bool')
255 M[U**2 + V**2 < 0.5 * 0.5] = True
256 U = ma.masked_array(U, mask=M)
257 V = ma.masked_array(V, mask=M)
258
259
260 all_peaks = []
261 peak_counter = 0
262
263 for part in range(19-1):
264 map_ori = heatmap_avg[:,:,part]
265 map = gaussian_filter(map_ori, sigma=3)
266
267 map_left = np.zeros(map.shape)
268 map_left[1:,:] = map[:-1,:]
269 map_right = np.zeros(map.shape)
270 map_right[:-1,:] = map[1:,:]
271 map_up = np.zeros(map.shape)
272 map_up[:,1:] = map[:,:-1]
273 map_down = np.zeros(map.shape)
274 map_down[:,:-1] = map[:,1:]
275
276 peaks_binary = np.logical_and.reduce((map>=map_left, map>=map_right, map>=map_up, map>=map_down, map > param['thre1']))
277 peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
278 peaks_with_score = [x + (map_ori[x[1],x[0]],) for x in peaks]
279 id = range(peak_counter, peak_counter + len(peaks))
280 peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]
281
282 all_peaks.append(peaks_with_score_and_id)
283 peak_counter += len(peaks)
284 ##openpose
285 #openpose
286 connection_all = []
287 special_k = []
288 mid_num = 10
289
290 for k in range(len(mapIdx)):
291 score_mid = paf_avg[:,:,[x-19 for x in mapIdx[k]]]
292 candA = all_peaks[limbSeq[k][0]-1]
293 candB = all_peaks[limbSeq[k][1]-1]
294 nA = len(candA)
295 nB = len(candB)
296 indexA, indexB = limbSeq[k]
297 if(nA != 0 and nB != 0):
298 connection_candidate = []
299 for i in range(nA):
300 for j in range(nB):
301 vec = np.subtract(candB[j][:2], candA[i][:2])
302 #
303 #print('vec0:',vec[0],'vec1:',vec[1])
304 # #
305 norm = math.sqrt(vec[0]*vec[0] + vec[1]*vec[1]+0.1)
306 vec = np.divide(vec, norm)
307
308 startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
309 np.linspace(candA[i][1], candB[j][1], num=mid_num)))
310
311 vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
312 for I in range(len(startend))])
313 vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
314 for I in range(len(startend))])
315
316 score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
317 #
318 #print('norm',norm)
319 # #
320 score_with_dist_prior = sum(score_midpts)/len(score_midpts) + min(0.5*oriImg.shape[0]/norm-1, 0)
321
322 criterion1 = len(np.nonzero(score_midpts > param['thre2'])[0]) > 0.8 * len(score_midpts)
323 criterion2 = score_with_dist_prior > 0
324 if criterion1 and criterion2:
325 connection_candidate.append([i, j, score_with_dist_prior, score_with_dist_prior+candA[i][2]+candB[j][2]])
326
327 connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
328 connection = np.zeros((0,5))
329 for c in range(len(connection_candidate)):
330 i,j,s = connection_candidate[c][0:3]
331 if(i not in connection[:,3] and j not in connection[:,4]):
332 connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
333 if(len(connection) >= min(nA, nB)):
334 break
335
336 connection_all.append(connection)
337 else:
338 special_k.append(k)
339 connection_all.append([])
340 ##openpose
341 #openpose
342 # last number in each row is the total parts number of that person
343 # the second last number in each row is the score of the overall configuration
344 subset = -1 * np.ones((0, 20))
345 candidate = np.array([item for sublist in all_peaks for item in sublist])
346
347 for k in range(len(mapIdx)):
348 if k not in special_k:
349 partAs = connection_all[k][:,0]
350 partBs = connection_all[k][:,1]
351 indexA, indexB = np.array(limbSeq[k]) - 1
352
353 for i in range(len(connection_all[k])): #= 1:size(temp,1)
354 found = 0
355 subset_idx = [-1, -1]
356 for j in range(len(subset)): #1:size(subset,1):
357 if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
358 subset_idx[found] = j
359 found += 1
360
361 if found == 1:
362 j = subset_idx[0]
363 if(subset[j][indexB] != partBs[i]):
364 subset[j][indexB] = partBs[i]
365 subset[j][-1] += 1
366 subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
367 elif found == 2: # if found 2 and disjoint, merge them
368 j1, j2 = subset_idx
369 print ("found = 2")
370 membership = ((subset[j1]>=0).astype(int) + (subset[j2]>=0).astype(int))[:-2]
371 if len(np.nonzero(membership == 2)[0]) == 0: #merge
372 subset[j1][:-2] += (subset[j2][:-2] + 1)
373 subset[j1][-2:] += subset[j2][-2:]
374 subset[j1][-2] += connection_all[k][i][2]
375 subset = np.delete(subset, j2, 0)
376 else: # as like found == 1
377 subset[j1][indexB] = partBs[i]
378 subset[j1][-1] += 1
379 subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
380
381 # if find no partA in the subset, create a new subset
382 elif not found and k < 17:
383 row = -1 * np.ones(20)
384 row[indexA] = partAs[i]
385 row[indexB] = partBs[i]
386 row[-1] = 2
387 row[-2] = sum(candidate[connection_all[k][i,:2].astype(int), 2]) + connection_all[k][i][2]
388 subset = np.vstack([subset, row])
389 # delete some rows of subset which has few parts occur
390 deleteIdx = [];
391 for i in range(len(subset)):
392 if subset[i][-1] < 4 or subset[i][-2]/subset[i][-1] < 0.4:
393 deleteIdx.append(i)
394 subset = np.delete(subset, deleteIdx, axis=0)
395 ##openpose
396 numPersons= nPersons(subset)
397 #print(subset2AIsubset(subset, numPersons))
398 AIsubset = subset2AIsubset(subset,numPersons)
399 #print(all_peaks[i][numPersons][3]==[int(AIsubset[0][0])])
400 #all_peaks->all_peaks_1d
401 all_peaks_1d=all_peaks2all_peaks_1d(all_peaks)
402 #print('numPersons:',numPersons)
403 #print('multi_keypoints:',listMultiKeypoints(all_peaks_1d, numPersons))
404 keys=['image_id','keypoint_annotations']
405 values=[]
406 image_id=str(file)[:-4]
407
408 keypoint_annotations = dict(zip(listHuman(numPersons), listMultiKeypoints(all_peaks_1d, numPersons)))
409 values.append(image_id)
410 values.append(keypoint_annotations)
411
412 d = dict(zip(keys, values))
413
414 final_results.append(d)
415 print(final_results)
416 with open('data.json', 'w') as f:
417 json.dump(final_results, f)
418
419 print(list_image_names)
[
{
"image_id": "a0f6bdc065a602b7b84a67fb8d14ce403d902e0d",
"keypoint_annotations": {
"human1": [261, 294, 1, 281, 328, 1, 0, 0, 0, 213, 295, 1, 208, 346, 1, 192, 335, 1, 245, 375, 1, 255, 432, 1, 244, 494, 1, 221, 379, 1, 219, 442, 1, 226, 491, 1, 226, 256, 1, 231, 284, 1],
"human2": [313, 301, 1, 305, 337, 1, 321, 345, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 313, 359, 1, 320, 409, 1, 311, 454, 1, 0, 0, 0, 330, 409, 1, 324, 446, 1, 337, 284, 1, 327, 302, 1],
"human3": [373, 304, 1, 346, 286, 1, 332, 263, 1, 0, 0, 0, 0, 0, 0, 345, 313, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 363, 386, 1, 361, 424, 1, 361, 475, 1, 365, 273, 1, 369, 297, 1],
...
}
}
...
]