深度學習解決手寫數字的圖片識別

  • 2019 年 11 月 7 日
  • 筆記

本篇使用TensorFlow框架,利用MNIST手寫數字數據集來演示深度學習的入門概念。其訓練集共有60000個樣本(圖片和標籤),測試集有10000個樣本。手寫數字的圖片都是尺寸為28*28的二值圖:

我們先導入必要的庫:

import  tensorflow as tf  import tensorflow.examples.tutorials.mnist.input_data as input_data  import os

設置全連接神經網絡的參數:神經網絡的結構為784*500*10 (輸入層784節點,1層500個節點的隱藏層,除輸出層外每層的激活函數都使用ReLU, 輸出層10個節點, 最後使用tf.argmax()函數求出輸出層節點中最大的數的索引,範圍0~9,該索引值即為手寫數字的估計值)

註:上述圖片僅做示意,每層節點數,以及隱藏層的層數以代碼為準

#模型路徑  MODEL_SAVE_PATH ="/model_path/"  MODEL_NAME = "MNIST_model1.ckpt"  INPUT_NODE = 28*28 #圖片28*28像素,展平為784=28*28個輸入節點  OUTPUT_NODE = 10 #輸出特徵為10個,對應0~9的量  BATCH_SIZE  =100 # 訓練批次的size  LEARNING_RATE_BASE = 0.8 #基礎學習率  LEARNING_RATE_DECAY = 0.99 #學習率縮減係數  REGULARIZATION_RATE = 0.0001 # 正則率  TRAINING_STEPS = 30000 #總的訓練步數  MOVING_AVERAGE_DECAY = 0.99 #移動平均縮減係數  #神經網絡的結構,784*500*10 (輸入層784節點,1層500個節點的隱藏層,輸出層10個節點)  layer_dimension = [INPUT_NODE,500,OUTPUT_NODE]  #也可以是多個隱藏層,如 layer_dimension = [INPUT_NODE,50,100,20,UTPUT_NODE]  n_layers = len(layer_dimension) #神經網絡總的層數

前向傳播:

def inference(input_x, avg_class, reuse = True):      '''Forward propagation'''      current_layer = input_x      in_dimension = INPUT_NODE      with tf.variable_scope("layers", reuse =reuse):          for i in range(1, n_layers):#循環              out_dimension = layer_dimension[i] # weight, bias can't be local variables, or will not be updated!!!!!!!!!              weight = tf.get_variable("weight_"+str(i), [in_dimension, out_dimension], initializer = tf.truncated_normal_initializer(stddev = 0.1))              tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)(weight))# L2 regularization              bias = tf.get_variable("bias_"+str(i), [out_dimension], initializer = tf.constant_initializer(0.0))                if avg_class == None:                  if i == n_layers - 1:                      current_layer = tf.matmul(current_layer, weight) + bias                  else:                      current_layer = tf.nn.relu(tf.matmul(current_layer, weight) +bias)              else:                  if i == n_layers - 1:                      current_layer = tf.matmul(current_layer, avg_class.average(weight)) + avg_class.average(bias)                  else:                      current_layer = tf.nn.relu(tf.matmul(current_layer, avg_class.average(weight)) + avg_class.average(bias))              in_dimension = out_dimension      return current_layer # output

神經網絡訓練,即反向傳播,以梯度下降算法優化各個權重W張量和各個偏置B張量。

def train(mnist):      '''training'''      x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')      y_ = tf.placeholder(tf.float32,[None, OUTPUT_NODE], name = 'y-input')      y = inference(x, None, reuse = False)      global_step = tf.Variable(0, trainable = False)      #滑動平均模型      variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)      variables_averages_op = variable_averages.apply(tf.trainable_variables()) # moving average applied      average_y = inference(x, variable_averages, reuse = True)        # loss      cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = y, labels = tf.argmax(y_, 1))      cross_entropy_mean = tf.reduce_mean(cross_entropy)      tf.add_to_collection('losses', cross_entropy_mean)      loss = tf.add_n(tf.get_collection('losses'))      #loss = cross_entropy_mean      #learning rate with decay      learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step,mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY, staircase = True)      #learning_rate = 0.01      train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)      train_op = tf.group(train_step, variables_averages_op)      correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))      accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))#tf.cast做類型轉換      with tf.Session() as sess:          tf.global_variables_initializer().run()          validate_feed  = {x: mnist.validation.images, y_ : mnist.validation.labels}          test_feed        = {x: mnist.test.images, y_ : mnist.test.labels}          steps = [] # only for plot          accs = [] # only for plot          losses = [] # only for plot          for i in range(TRAINING_STEPS):              xs, ys = mnist.train.next_batch(BATCH_SIZE)              _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict = {x : xs, y_: ys})              if i % 200  == 0:                  validate_acc = sess.run(accuracy, feed_dict = validate_feed)                  steps.append(step); accs.append(validate_acc*100); losses.append(loss_value) # only for plot                  #print("After %d training steps, validation accuracy using average model on training batch is %g%%, loss on training batch is %g"%(step, validate_acc*100,loss_value))                  #saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step =global_step)            test_acc = sess.run(accuracy, feed_dict = test_feed)          print("After %d training steps, test accuracy using average model is %g%%"%                (TRAINING_STEPS, test_acc*100))            #僅用於tensorboard          writer = tf.summary.FileWriter("E://TensorBoard//test",sess.graph)            #保存神經網絡模型          saver = tf.train.Saver()          saver.save(sess, r"E:Python36my tensorflowckpt filesmode_mnist.ckpt")        #only for plot#不是必要步驟      from matplotlib import pyplot as plt      import matplotlib.ticker as mtick      plt.subplot(211)      plt.plot(steps, losses,color="red")      plt.scatter(steps, losses,s=20,color="red")      plt.xlabel("step"); plt.ylabel("loss(including L2 regularization loss) on training set")      plt.subplot(212)      plt.plot(steps, accs,color="green")      plt.scatter(steps, accs,s=20,color="green")      yticks = mtick.FormatStrFormatter("%.3f%%")      plt.gca().yaxis.set_major_formatter(yticks)      plt.xlabel("step"); plt.ylabel("accuracy on training set")      plt.show()

真正加載MNIST數據集,並訓練模型:

def main(argv = None):      mnist = input_data.read_data_sets(r"E:Python36my tensorflowMNIST_data",one_hot =True)      train(mnist)    if __name__ == "__main__":      tf.app.run()

可以看出5000步以後,模型已大致收斂:

30000步迭代之後,在測試集上的準確率已高達98.5%。

After 30000 training steps, test accuracy using average model is 98.5%  

下面我們利用已訓練好的模型做預測:

import  tensorflow as tf  import tensorflow.examples.tutorials.mnist.input_data as input_data  import matplotlib.pyplot as plt  INPUT_NODE =    28*28  OUTPUT_NODE = 10  layer_dimension = [INPUT_NODE,500,OUTPUT_NODE]  n_layers = len(layer_dimension)  def inference(input_x, avg_class, reuse = True):      current_layer = input_x      in_dimension = INPUT_NODE      with tf.variable_scope("layers", reuse =reuse):          for i in range(1, n_layers):              out_dimension = layer_dimension[i] # weight, bias can't be local variables, or will not be updated!!!!!!!!!              w = tf.get_variable("weight_"+str(i), [in_dimension, out_dimension])              b = tf.get_variable("bias_"+str(i), [out_dimension])              if avg_class == None:                  if i == n_layers - 1:                      current_layer = tf.matmul(current_layer, w) + b                  else:                      current_layer = tf.nn.relu(tf.matmul(current_layer, w) +b)              in_dimension = out_dimension      return current_layer # output  with tf.variable_scope("layers", reuse =tf.AUTO_REUSE): ####!!!!!!!!      weight1 =  tf.get_variable("weight_1", [INPUT_NODE, 500])      bias1= tf.get_variable("bias_1", [500])      weight2 =  tf.get_variable("weight_2", [500, OUTPUT_NODE])      bias2= tf.get_variable("bias_2", [OUTPUT_NODE])  #saver = tf.train.import_meta_graph(r"E:Python36my tensorflowckpt filesmode_mnist.ckpt.meta")  saver = tf.train.Saver()  with tf.Session() as sess:      saver.restore(sess, r"E:Python36my tensorflowckpt filesmode_mnist.ckpt")      #graph = tf.get_default_graph()      #for v in tf.global_variables():          #print(v.name)          #print(v.eval())      image_rawdata = tf.gfile.FastGFile(r"E:Python36MNIST picturetest54.jpg","rb").read()      img_data = tf.image.decode_jpeg(image_rawdata)      if img_data.dtype != tf.float32:          img_data = tf.image.convert_image_dtype(img_data, dtype = tf.float32)      image_data_shaped2 =  tf.reshape(img_data,(1,INPUT_NODE))# tftensor      input_x =  image_data_shaped2      y = inference(input_x, avg_class=None, reuse = True)      print("y: ", sess.run(y))      print( "predict number: ", sess.run(tf.argmax(y, 1)))#output label        image_data = img_data.eval() # return a numpy array      image_data_shaped1 = image_data.reshape(image_data.shape[0],image_data.shape[1])#numpy array      #print(image_data_shaped1)      plt.imshow(image_data_shaped1,cmap='gray')      plt.show()

注意,要保持神經網絡的結構和訓練時的一致。我們加載已訓練好的模型,用它來預測測試集中的第54張圖:

輸出結果是:

predict number:  [6]

正確!

如果想要預測我們自己拍的照片,記得須先將照片轉化為28*28的二值圖, 用openCV實現起來很簡單,不再贅述。