openpose模型在AI challenge人体骨骼关键点检测的表现

  • 2020 年 3 月 12 日
  • 筆記

因为之前正好看了CMU在CVPR2017上的论文《Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields》,而且他们提供了训练好的模型。所以就直接用CMU训练的模型在AI challenge的数据集上做了测试。最后没有使用AI challenge训练集训练的模型在AI challenge上的得分是0.1667,可以看作是一个baseline。

以下是预处理的说明以及加入预处理程序的源代码。openpose的源代码使用#openpose ##openpose标注出来了,剩下的就是AI challenge的预处理程序。

在Google Cloud 上使用1片NVIDIA Tesla K80 跑完AI challenge的测试集大约需要24小时,4秒左右处理一副图。

AI challenge测试要求的关键点顺序是:1右肩,2右肘,3右腕,4左肩,5左肘,6左腕,7右髋,8右膝,9右踝,10左髋,11左膝,12左踝,13头顶,14脖子

openpose源码中subset输出的关键点顺序是:1鼻子,2脖子,3右肩,4右肘,5右腕,6左肩,7左肘,8左腕,9右髋,10右膝,11右踝,12左髋,13左膝,14左踝,15左眼,16右眼,17左耳,18右耳,19 pt19

函数 subset2AIsubset, all_peaks2all_peaks_1d, listMultiKeypoints 负责把openpose的关键点转换成AI challenge 的关键点。

当然还得按照官网上的要求输出特定格式的JSON文件,如下所示:

[      {          "image_id": "a0f6bdc065a602b7b84a67fb8d14ce403d902e0d",          "keypoint_annotations": {          "human1": [261, 294, 1, 281, 328, 1, 0, 0, 0, 213, 295, 1, 208, 346, 1, 192, 335, 1, 245, 375, 1, 255, 432, 1, 244, 494, 1, 221, 379, 1, 219, 442, 1, 226, 491, 1, 226, 256, 1, 231, 284, 1],          "human2": [313, 301, 1, 305, 337, 1, 321, 345, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 313, 359, 1, 320, 409, 1, 311, 454, 1, 0, 0, 0, 330, 409, 1, 324, 446, 1, 337, 284, 1, 327, 302, 1],          "human3": [373, 304, 1, 346, 286, 1, 332, 263, 1, 0, 0, 0, 0, 0, 0, 345, 313, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 363, 386, 1, 361, 424, 1, 361, 475, 1, 365, 273, 1, 369, 297, 1],          ...          }      }      ...  ]  
  1 #import numpy as np    2 import json    3 import os    4 #openpose    5 import keras    6 from keras.models import Sequential    7 from keras.models import Model    8 from keras.layers import Input, Dense, Activation    9 from keras.layers.convolutional import Conv2D   10 from keras.layers.pooling import MaxPooling2D   11 from keras.layers.normalization import BatchNormalization   12 from keras.layers.merge import Concatenate   13 from config_reader import config_reader   14 import scipy   15   16 import cv2   17 import numpy as np   18 np.seterr(divide='ignore', invalid='ignore')   19 import util   20 import math   21 from numpy import ma   22 from scipy.ndimage.filters import gaussian_filter   23 ##openpose   24 #openpose   25 def relu(x):   26     return Activation('relu')(x)   27   28 def conv(x, nf, ks, name):   29     x1 = Conv2D(nf, (ks, ks), padding='same', name=name)(x)   30     return x1   31   32 def pooling(x, ks, st, name):   33     x = MaxPooling2D((ks, ks), strides=(st, st), name=name)(x)   34     return x   35   36 def vgg_block(x):   37   38     # Block 1   39     x = conv(x, 64, 3, "conv1_1")   40     x = relu(x)   41     x = conv(x, 64, 3, "conv1_2")   42     x = relu(x)   43     x = pooling(x, 2, 2, "pool1_1")   44   45     # Block 2   46     x = conv(x, 128, 3, "conv2_1")   47     x = relu(x)   48     x = conv(x, 128, 3, "conv2_2")   49     x = relu(x)   50     x = pooling(x, 2, 2, "pool2_1")   51   52     # Block 3   53     x = conv(x, 256, 3, "conv3_1")   54     x = relu(x)   55     x = conv(x, 256, 3, "conv3_2")   56     x = relu(x)   57     x = conv(x, 256, 3, "conv3_3")   58     x = relu(x)   59     x = conv(x, 256, 3, "conv3_4")   60     x = relu(x)   61     x = pooling(x, 2, 2, "pool3_1")   62   63     # Block 4   64     x = conv(x, 512, 3, "conv4_1")   65     x = relu(x)   66     x = conv(x, 512, 3, "conv4_2")   67     x = relu(x)   68   69     # Additional non vgg layers   70     x = conv(x, 256, 3, "conv4_3_CPM")   71     x = relu(x)   72     x = conv(x, 128, 3, "conv4_4_CPM")   73     x = relu(x)   74   75     return x   76   77 def stage1_block(x, num_p, branch):   78   79     # Block 1   80     x = conv(x, 128, 3, "conv5_1_CPM_L%d" % branch)   81     x = relu(x)   82     x = conv(x, 128, 3, "conv5_2_CPM_L%d" % branch)   83     x = relu(x)   84     x = conv(x, 128, 3, "conv5_3_CPM_L%d" % branch)   85     x = relu(x)   86     x = conv(x, 512, 1, "conv5_4_CPM_L%d" % branch)   87     x = relu(x)   88     x = conv(x, num_p, 1, "conv5_5_CPM_L%d" % branch)   89   90     return x   91   92 def stageT_block(x, num_p, stage, branch):   93   94     # Block 1   95     x = conv(x, 128, 7, "Mconv1_stage%d_L%d" % (stage, branch))   96     x = relu(x)   97     x = conv(x, 128, 7, "Mconv2_stage%d_L%d" % (stage, branch))   98     x = relu(x)   99     x = conv(x, 128, 7, "Mconv3_stage%d_L%d" % (stage, branch))  100     x = relu(x)  101     x = conv(x, 128, 7, "Mconv4_stage%d_L%d" % (stage, branch))  102     x = relu(x)  103     x = conv(x, 128, 7, "Mconv5_stage%d_L%d" % (stage, branch))  104     x = relu(x)  105     x = conv(x, 128, 1, "Mconv6_stage%d_L%d" % (stage, branch))  106     x = relu(x)  107     x = conv(x, num_p, 1, "Mconv7_stage%d_L%d" % (stage, branch))  108  109     return x  110 ##openpose  111  112 def subset2AIsubset(t, numPersons):  113     AIsubset=[]  114     for j in xrange(numPersons):  115         tempsubset=[]  116         for i in xrange(12):  117             #20  118             #print(i+2)  119             tempsubset.append(t[j][i+2])  120  121         tempsubset.append(t[j][0])  122         tempsubset.append(t[j][1])  123         #print(AIsubset)  124         AIsubset.append(tempsubset)  125     return AIsubset  126  127 def all_peaks2all_peaks_1d(all_peaks):  128     all_peaks_1d=[]  129     for item in all_peaks:  130         for item1 in item:  131             all_peaks_1d.append(item1)  132     return all_peaks_1d  133  134 def listMultiKeypoints(all_peaks_1d, numPersons):  135     multi_keypoints=[]  136     for i in xrange(numPersons):  137         sp_keypoints=[]  138         for j in xrange(14):  139             if(AIsubset[i][j]== -1.):  140                 sp_keypoints.append(0)  141                 sp_keypoints.append(0)  142                 sp_keypoints.append(0)  143             else:  144                 sp_keypoints.append(all_peaks_1d[int(AIsubset[i][j])][0])  145                 sp_keypoints.append(all_peaks_1d[int(AIsubset[i][j])][1])  146                 sp_keypoints.append(1)  147         #print(sp_keypoints)  148         multi_keypoints.append(sp_keypoints)  149     return multi_keypoints  150  151 def nPersons(t):  152     return len(t)  153  154 def listHuman(nPersons):  155     list_human=[]  156     for i in xrange(numPersons):  157         list_human.append('human'+str(i+1))  158     return list_human  159  160  161  162 #openpose  163 weights_path = "model/keras/model.h5"  164  165 input_shape = (None,None,3)  166  167 img_input = Input(shape=input_shape)  168  169 stages = 6  170 np_branch1 = 38  171 np_branch2 = 19  172  173 # VGG  174 stage0_out = vgg_block(img_input)  175  176 # stage 1  177 stage1_branch1_out = stage1_block(stage0_out, np_branch1, 1)  178 stage1_branch2_out = stage1_block(stage0_out, np_branch2, 2)  179 x = Concatenate()([stage1_branch1_out, stage1_branch2_out, stage0_out])  180  181 # stage t >= 2  182 for sn in range(2, stages + 1):  183     stageT_branch1_out = stageT_block(x, np_branch1, sn, 1)  184     stageT_branch2_out = stageT_block(x, np_branch2, sn, 2)  185     if (sn < stages):  186         x = Concatenate()([stageT_branch1_out, stageT_branch2_out, stage0_out])  187  188 model = Model(img_input, [stageT_branch1_out, stageT_branch2_out])  189 model.load_weights(weights_path)  190 ##openpose  191  192 #openpose  193 # find connection in the specified sequence, center 29 is in the position 15  194 limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10],   195            [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17],   196            [1,16], [16,18], [3,17], [6,18]]  197 # the middle joints heatmap correpondence  198 mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22],   199           [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52],   200           [55,56], [37,38], [45,46]]  201 ##openpose  202  203 path = "./test0"  204 files = os.listdir(path)  205 list_image_names=[]  206 final_results=[]  207 num_processed_images=0.  208 total_images=30000.  209 for file in files:  210     num_processed_images+=1  211     print('file:',file)  212     print('number of image:',num_processed_images)  213     print('%.2f%%'%(num_processed_images/total_images*100))  214     list_image_names.append(str(file)[:-4])  215     #openpose  216     test_image = './test0/'+file  217     #test_image = 'sample_images/000a902c8674739c97f188157c63d709b45b7595.jpg'  218     oriImg = cv2.imread(test_image)  219  220     param, model_params = config_reader()  221     multiplier = [x * model_params['boxsize'] / oriImg.shape[0] for x in param['scale_search']]  222     heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))  223     paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))  224  225     for m in range(len(multiplier)):  226         scale = multiplier[m]  227         imageToTest = cv2.resize(oriImg, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)  228         imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_params['stride'], model_params['padValue'])  229  230         input_img = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,0,1,2))/256 - 0.5; # required shape (1, width, height, channels)  231         print("Input shape: " + str(input_img.shape))  232  233         output_blobs = model.predict(input_img)  234         print("Output shape (heatmap): " + str(output_blobs[1].shape))  235  236         # extract outputs, resize, and remove padding  237         heatmap = np.squeeze(output_blobs[1]) # output 1 is heatmaps  238         heatmap = cv2.resize(heatmap, (0,0), fx=model_params['stride'], fy=model_params['stride'], interpolation=cv2.INTER_CUBIC)  239         heatmap = heatmap[:imageToTest_padded.shape[0]-pad[2], :imageToTest_padded.shape[1]-pad[3], :]  240         heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)  241  242         paf = np.squeeze(output_blobs[0]) # output 0 is PAFs  243         paf = cv2.resize(paf, (0,0), fx=model_params['stride'], fy=model_params['stride'], interpolation=cv2.INTER_CUBIC)  244         paf = paf[:imageToTest_padded.shape[0]-pad[2], :imageToTest_padded.shape[1]-pad[3], :]  245         paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)  246  247         heatmap_avg = heatmap_avg + heatmap / len(multiplier)  248         paf_avg = paf_avg + paf / len(multiplier)  249     ##openpose  250     #openpose  251     U = paf_avg[:,:,16] * -1  252     V = paf_avg[:,:,17]  253     X, Y = np.meshgrid(np.arange(U.shape[1]), np.arange(U.shape[0]))  254     M = np.zeros(U.shape, dtype='bool')  255     M[U**2 + V**2 < 0.5 * 0.5] = True  256     U = ma.masked_array(U, mask=M)  257     V = ma.masked_array(V, mask=M)  258  259  260     all_peaks = []  261     peak_counter = 0  262  263     for part in range(19-1):  264         map_ori = heatmap_avg[:,:,part]  265         map = gaussian_filter(map_ori, sigma=3)  266  267         map_left = np.zeros(map.shape)  268         map_left[1:,:] = map[:-1,:]  269         map_right = np.zeros(map.shape)  270         map_right[:-1,:] = map[1:,:]  271         map_up = np.zeros(map.shape)  272         map_up[:,1:] = map[:,:-1]  273         map_down = np.zeros(map.shape)  274         map_down[:,:-1] = map[:,1:]  275  276         peaks_binary = np.logical_and.reduce((map>=map_left, map>=map_right, map>=map_up, map>=map_down, map > param['thre1']))  277         peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse  278         peaks_with_score = [x + (map_ori[x[1],x[0]],) for x in peaks]  279         id = range(peak_counter, peak_counter + len(peaks))  280         peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]  281  282         all_peaks.append(peaks_with_score_and_id)  283         peak_counter += len(peaks)  284     ##openpose  285     #openpose  286     connection_all = []  287     special_k = []  288     mid_num = 10  289  290     for k in range(len(mapIdx)):  291         score_mid = paf_avg[:,:,[x-19 for x in mapIdx[k]]]  292         candA = all_peaks[limbSeq[k][0]-1]  293         candB = all_peaks[limbSeq[k][1]-1]  294         nA = len(candA)  295         nB = len(candB)  296         indexA, indexB = limbSeq[k]  297         if(nA != 0 and nB != 0):  298             connection_candidate = []  299             for i in range(nA):  300                 for j in range(nB):  301                     vec = np.subtract(candB[j][:2], candA[i][:2])  302                     #  303                     #print('vec0:',vec[0],'vec1:',vec[1])  304                     # #  305                     norm = math.sqrt(vec[0]*vec[0] + vec[1]*vec[1]+0.1)  306                     vec = np.divide(vec, norm)  307  308                     startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num),   309                                 np.linspace(candA[i][1], candB[j][1], num=mid_num)))  310  311                     vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0]   312                                     for I in range(len(startend))])  313                     vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1]   314                                     for I in range(len(startend))])  315  316                     score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])  317                     #  318                     #print('norm',norm)  319                     # #  320                     score_with_dist_prior = sum(score_midpts)/len(score_midpts) + min(0.5*oriImg.shape[0]/norm-1, 0)  321  322                     criterion1 = len(np.nonzero(score_midpts > param['thre2'])[0]) > 0.8 * len(score_midpts)  323                     criterion2 = score_with_dist_prior > 0  324                     if criterion1 and criterion2:  325                         connection_candidate.append([i, j, score_with_dist_prior, score_with_dist_prior+candA[i][2]+candB[j][2]])  326  327             connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)  328             connection = np.zeros((0,5))  329             for c in range(len(connection_candidate)):  330                 i,j,s = connection_candidate[c][0:3]  331                 if(i not in connection[:,3] and j not in connection[:,4]):  332                     connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])  333                     if(len(connection) >= min(nA, nB)):  334                         break  335  336             connection_all.append(connection)  337         else:  338             special_k.append(k)  339             connection_all.append([])  340     ##openpose  341     #openpose  342     # last number in each row is the total parts number of that person  343     # the second last number in each row is the score of the overall configuration  344     subset = -1 * np.ones((0, 20))  345     candidate = np.array([item for sublist in all_peaks for item in sublist])  346  347     for k in range(len(mapIdx)):  348         if k not in special_k:  349             partAs = connection_all[k][:,0]  350             partBs = connection_all[k][:,1]  351             indexA, indexB = np.array(limbSeq[k]) - 1  352  353             for i in range(len(connection_all[k])): #= 1:size(temp,1)  354                 found = 0  355                 subset_idx = [-1, -1]  356                 for j in range(len(subset)): #1:size(subset,1):  357                     if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:  358                         subset_idx[found] = j  359                         found += 1  360  361                 if found == 1:  362                     j = subset_idx[0]  363                     if(subset[j][indexB] != partBs[i]):  364                         subset[j][indexB] = partBs[i]  365                         subset[j][-1] += 1  366                         subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]  367                 elif found == 2: # if found 2 and disjoint, merge them  368                     j1, j2 = subset_idx  369                     print ("found = 2")  370                     membership = ((subset[j1]>=0).astype(int) + (subset[j2]>=0).astype(int))[:-2]  371                     if len(np.nonzero(membership == 2)[0]) == 0: #merge  372                         subset[j1][:-2] += (subset[j2][:-2] + 1)  373                         subset[j1][-2:] += subset[j2][-2:]  374                         subset[j1][-2] += connection_all[k][i][2]  375                         subset = np.delete(subset, j2, 0)  376                     else: # as like found == 1  377                         subset[j1][indexB] = partBs[i]  378                         subset[j1][-1] += 1  379                         subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]  380  381                 # if find no partA in the subset, create a new subset  382                 elif not found and k < 17:  383                     row = -1 * np.ones(20)  384                     row[indexA] = partAs[i]  385                     row[indexB] = partBs[i]  386                     row[-1] = 2  387                     row[-2] = sum(candidate[connection_all[k][i,:2].astype(int), 2]) + connection_all[k][i][2]  388                     subset = np.vstack([subset, row])  389     # delete some rows of subset which has few parts occur  390     deleteIdx = [];  391     for i in range(len(subset)):  392         if subset[i][-1] < 4 or subset[i][-2]/subset[i][-1] < 0.4:  393             deleteIdx.append(i)  394     subset = np.delete(subset, deleteIdx, axis=0)  395     ##openpose  396     numPersons= nPersons(subset)  397     #print(subset2AIsubset(subset, numPersons))  398     AIsubset = subset2AIsubset(subset,numPersons)  399     #print(all_peaks[i][numPersons][3]==[int(AIsubset[0][0])])  400     #all_peaks->all_peaks_1d  401     all_peaks_1d=all_peaks2all_peaks_1d(all_peaks)  402     #print('numPersons:',numPersons)  403     #print('multi_keypoints:',listMultiKeypoints(all_peaks_1d, numPersons))  404     keys=['image_id','keypoint_annotations']  405     values=[]  406     image_id=str(file)[:-4]  407  408     keypoint_annotations = dict(zip(listHuman(numPersons), listMultiKeypoints(all_peaks_1d, numPersons)))  409     values.append(image_id)  410     values.append(keypoint_annotations)  411  412     d = dict(zip(keys, values))  413  414     final_results.append(d)  415 print(final_results)  416 with open('data.json', 'w') as f:  417     json.dump(final_results, f)  418  419 print(list_image_names)
[      {          "image_id": "a0f6bdc065a602b7b84a67fb8d14ce403d902e0d",          "keypoint_annotations": {          "human1": [261, 294, 1, 281, 328, 1, 0, 0, 0, 213, 295, 1, 208, 346, 1, 192, 335, 1, 245, 375, 1, 255, 432, 1, 244, 494, 1, 221, 379, 1, 219, 442, 1, 226, 491, 1, 226, 256, 1, 231, 284, 1],          "human2": [313, 301, 1, 305, 337, 1, 321, 345, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 313, 359, 1, 320, 409, 1, 311, 454, 1, 0, 0, 0, 330, 409, 1, 324, 446, 1, 337, 284, 1, 327, 302, 1],          "human3": [373, 304, 1, 346, 286, 1, 332, 263, 1, 0, 0, 0, 0, 0, 0, 345, 313, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 363, 386, 1, 361, 424, 1, 361, 475, 1, 365, 273, 1, 369, 297, 1],          ...          }      }      ...  ]