Tensorflow學習筆記—人臉識別DEMO實現

  • 2019 年 10 月 6 日
  • 筆記

'''  數據材料  這是一個小型的人臉數據庫,一共有40個人,每個人有10張照片作為樣本數據。  這些圖片都是黑白照片,意味着這些圖片都只有灰度0-255,沒有rgb三通道。  於是我們需要對這張大圖片切分成一個個的小臉。整張圖片大小是1190 × 942,  一共有20 × 20張照片。那麼每張照片的大小就是:  (1190 / 20)× (942 / 20)= 57 × 47  (大約,以為每張圖片之間存在間距)    問題解決  10類樣本,利用CNN訓練可以分類10類數據的神經網絡,與手寫字符識別類似  '''          #coding=utf-8  import os  import numpy as np  import tensorflow as tf  import matplotlib.pyplot as plt  import matplotlib.image as mpimg  import matplotlib.patches as patches  from PIL import Image    #獲取dataset  def load_data(dataset_path):      img = Image.open(dataset_path)      # 定義一個20 × 20的訓練樣本,一共有40個人,每個人都10張樣本照片      img_ndarray = np.asarray(img, dtype='float64') / 256      #img_ndarray = np.asarray(img, dtype='float32') / 32        # 記錄臉數據矩陣,57 * 47為每張臉的像素矩陣      faces = np.empty((400, 57 * 47))        for row in range(20):          for column in range(20):              faces[20 * row + column] = np.ndarray.flatten(                  img_ndarray[row * 57: (row + 1) * 57, column * 47 : (column + 1) * 47]              )        label = np.zeros((400, 40))      for i in range(40):          label[i * 10: (i + 1) * 10, i] = 1        # 將數據分成訓練集,驗證集,測試集      train_data = np.empty((320, 57 * 47))      train_label = np.zeros((320, 40))      vaild_data = np.empty((40, 57 * 47))      vaild_label = np.zeros((40, 40))      test_data = np.empty((40, 57 * 47))      test_label = np.zeros((40, 40))        for i in range(40):          train_data[i * 8: i * 8 + 8] = faces[i * 10: i * 10 + 8]          train_label[i * 8: i * 8 + 8] = label[i * 10: i * 10 + 8]            vaild_data[i] = faces[i * 10 + 8]          vaild_label[i] = label[i * 10 + 8]            test_data[i] = faces[i * 10 + 9]          test_label[i] = label[i * 10 + 9]        train_data = train_data.astype('float32')      vaild_data = vaild_data.astype('float32')      test_data = test_data.astype('float32')        return [          (train_data, train_label),          (vaild_data, vaild_label),          (test_data, test_label)      ]    def convolutional_layer(data, kernel_size, bias_size, pooling_size):      kernel = tf.get_variable("conv", kernel_size, initializer=tf.random_normal_initializer())      bias = tf.get_variable('bias', bias_size, initializer=tf.random_normal_initializer())        conv = tf.nn.conv2d(data, kernel, strides=[1, 1, 1, 1], padding='SAME')      linear_output = tf.nn.relu(tf.add(conv, bias))      pooling = tf.nn.max_pool(linear_output, ksize=pooling_size, strides=pooling_size, padding="SAME")      return pooling    def linear_layer(data, weights_size, biases_size):      weights = tf.get_variable("weigths", weights_size, initializer=tf.random_normal_initializer())      biases = tf.get_variable("biases", biases_size, initializer=tf.random_normal_initializer())      return tf.add(tf.matmul(data, weights), biases)    def convolutional_neural_network(data):      # 根據類別個數定義最後輸出層的神經元      n_ouput_layer = 40        kernel_shape1=[5, 5, 1, 32]      kernel_shape2=[5, 5, 32, 64]      full_conn_w_shape = [15 * 12 * 64, 1024]      out_w_shape = [1024, n_ouput_layer]        bias_shape1=[32]      bias_shape2=[64]      full_conn_b_shape = [1024]      out_b_shape = [n_ouput_layer]        data = tf.reshape(data, [-1, 57, 47, 1])        # 經過第一層卷積神經網絡後,得到的張量shape為:[batch, 29, 24, 32]      with tf.variable_scope("conv_layer1") as layer1:          layer1_output = convolutional_layer(              data=data,              kernel_size=kernel_shape1,              bias_size=bias_shape1,              pooling_size=[1, 2, 2, 1]          )      # 經過第二層卷積神經網絡後,得到的張量shape為:[batch, 15, 12, 64]      with tf.variable_scope("conv_layer2") as layer2:          layer2_output = convolutional_layer(              data=layer1_output,              kernel_size=kernel_shape2,              bias_size=bias_shape2,              pooling_size=[1, 2, 2, 1]          )      with tf.variable_scope("full_connection") as full_layer3:          # 講卷積層張量數據拉成2-D張量只有有一列的列向量          layer2_output_flatten = tf.contrib.layers.flatten(layer2_output)          layer3_output = tf.nn.relu(              linear_layer(                  data=layer2_output_flatten,                  weights_size=full_conn_w_shape,                  biases_size=full_conn_b_shape              )          )          # layer3_output = tf.nn.dropout(layer3_output, 0.8)      with tf.variable_scope("output") as output_layer4:          output = linear_layer(              data=layer3_output,              weights_size=out_w_shape,              biases_size=out_b_shape          )        return output;    def train_facedata(dataset, model_dir,model_path):      # train_set_x = data[0][0]      # train_set_y = data[0][1]      # valid_set_x = data[1][0]      # valid_set_y = data[1][1]      # test_set_x = data[2][0]      # test_set_y = data[2][1]      # X = tf.placeholder(tf.float32, shape=(None, None), name="x-input")  # 輸入數據      # Y = tf.placeholder(tf.float32, shape=(None, None), name='y-input')  # 輸入標籤        batch_size = 40        # train_set_x, train_set_y = dataset[0]      # valid_set_x, valid_set_y = dataset[1]      # test_set_x, test_set_y = dataset[2]      train_set_x = dataset[0][0]      train_set_y = dataset[0][1]      valid_set_x = dataset[1][0]      valid_set_y = dataset[1][1]      test_set_x = dataset[2][0]      test_set_y = dataset[2][1]        X = tf.placeholder(tf.float32, [batch_size, 57 * 47])      Y = tf.placeholder(tf.float32, [batch_size, 40])        predict = convolutional_neural_network(X)      cost_func = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predict, labels=Y))      optimizer = tf.train.AdamOptimizer(1e-2).minimize(cost_func)        # 用於保存訓練的最佳模型      saver = tf.train.Saver()      #model_dir = './model'      #model_path = model_dir + '/best.ckpt'      with tf.Session() as session:          # 若不存在模型數據,需要訓練模型參數          if not os.path.exists(model_path + ".index"):              session.run(tf.global_variables_initializer())              best_loss = float('Inf')              for epoch in range(20):                  epoch_loss = 0                  for i in range((int)(np.shape(train_set_x)[0] / batch_size)):                      x = train_set_x[i * batch_size: (i + 1) * batch_size]                      y = train_set_y[i * batch_size: (i + 1) * batch_size]                      _, cost = session.run([optimizer, cost_func], feed_dict={X: x, Y: y})                      epoch_loss += cost                    print(epoch, ' : ', epoch_loss)                  if best_loss > epoch_loss:                      best_loss = epoch_loss                      if not os.path.exists(model_dir):                          os.mkdir(model_dir)                          print("create the directory: %s" % model_dir)                      save_path = saver.save(session, model_path)                      print("Model saved in file: %s" % save_path)            # 恢複數據並校驗和測試          saver.restore(session, model_path)          correct = tf.equal(tf.argmax(predict,1), tf.argmax(Y,1))          valid_accuracy = tf.reduce_mean(tf.cast(correct,'float'))          print('valid set accuracy: ', valid_accuracy.eval({X: valid_set_x, Y: valid_set_y}))            test_pred = tf.argmax(predict, 1).eval({X: test_set_x})          test_true = np.argmax(test_set_y, 1)          test_correct = correct.eval({X: test_set_x, Y: test_set_y})          incorrect_index = [i for i in range(np.shape(test_correct)[0]) if not test_correct[i]]          for i in incorrect_index:              print('picture person is %i, but mis-predicted as person %i'                  %(test_true[i], test_pred[i]))          plot_errordata(incorrect_index, "olivettifaces.gif")      #畫出在測試集中錯誤的數據  def plot_errordata(error_index, dataset_path):      img = mpimg.imread(dataset_path)      plt.imshow(img)      currentAxis = plt.gca()      for index in error_index:          row = index // 2          column = index % 2          currentAxis.add_patch(              patches.Rectangle(                  xy=(                       47 * 9 if column == 0 else 47 * 19,                       row * 57                      ),                  width=47,                  height=57,                  linewidth=1,                  edgecolor='r',                  facecolor='none'              )      )      plt.savefig("result.png")      plt.show()      def main():      dataset_path = "olivettifaces.gif"      data = load_data(dataset_path)      model_dir = './model'      model_path = model_dir + '/best.ckpt'      train_facedata(data, model_dir, model_path)    if __name__ == "__main__" :      main()