(二)Tensorflow搭建卷积神经网络实现MNIST手写字体识别及预测

原创

xdq101

修改于 2019-06-03 10:49:19

7410

修改于 2019-06-03 10:49:19

文章被收录于专栏：人工智能项目

1 搭建卷积神经网络

1.0 网络结构

1.2 网络分析

序号	网络层	描述
1	卷积层	一张原始图像(28, 28, 1),batch=1,经过卷积处理,得到图像特征(28, 28, 32)
2	下采样	即池化层,最大池化后图像特征(14, 14, 32)
3	卷积层	将池化特征(14, 14, 32)卷积处理后,得到图像特征(14, 14, 64)
4	下采样	最大池化,得到图像特征(7, 7, 64)
5	全连接层	将上一层即池化层的图像特征经过矩阵内积计算,拉成一个向量(7764=3136),特征为(1, 3136)
6	全连接层	继续矩阵计算,得到特征为(1, 512)
7	全连接	高斯矩阵计算,得到特征(1, 10)

2 网络结构

2.1 网络结构可视化

2.2 网络结构-源

def conv2d(input_tensor, ksize, strides, pad, name_w, name_b):
    weights = tf.get_variable(name=name_w, shape=ksize, dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))
    biases = tf.get_variable(name=name_b, shape=[ksize[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
    conv = tf.nn.conv2d(input_tensor, weights, strides=strides, padding=pad)
    conv = tf.nn.relu(conv + biases)
    return conv

def max_pooling(input_tensor, ksize, strides, pad):
    max_pool = tf.nn.max_pool(input_tensor, ksize=ksize, strides=strides, padding=pad)
    return max_pool

def fullc(input_tensor, wsize, name_w, name_b):
    weights = tf.get_variable(name=name_w, shape=wsize, dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))
    biases = tf.get_variable(name=name_b, shape=[wsize[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
    fullc = tf.matmul(input_tensor, weights) + biases
    return fullc
def inference(inputs, keep_prob):
    with tf.name_scope("conv_1"):
        conv_1 = conv2d(inputs, [5, 5, 1, 32], [1, 1, 1, 1], "SAME", "cw_1", "cb_1")
    with tf.name_scope("max_pool_1"):
        pooling_1 = max_pooling(conv_1, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
    with tf.name_scope("conv_2"):
        conv_2 = conv2d(pooling_1, [5, 5, 32, 64], [1, 1, 1, 1], "SAME", "cw_2", "cb_2")
    with tf.name_scope("max_pool_2"):
        pooling_2 = max_pooling(conv_2, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
    feature_shape = pooling_2.get_shape()
    flatten_1 = feature_shape[1].value * feature_shape[2].value * feature_shape[3].value
    feature_reshape = tf.reshape(pooling_2, [-1, flatten_1])
    with tf.name_scope("fc_1"):
        fc_1 = fullc(feature_reshape, [flatten_1, 512], "fw_1", "fb_1")
        fc_1 = tf.nn.dropout(fc_1, keep_prob)
    with tf.name_scope("fc_2"):
        fc_2 = fullc(fc_1, [512, 10], "fw_2", "fb_2")
    return fc_2

3 训练及测试

3.1 载入数据

mnist = input_data.read_data_sets("./mnist_data", one_hot=True)
img_inputs, img_labels = mnist.train.next_batch(BATCH_SIZE)
img_inputs = np.reshape(img_inputs, (BATCH_SIZE, 28, 28, 1))

3.2 训练及保存模型

def train_new(mnist):  
    inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
    labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
    prediction = inference(inputs, 0.5)
    loss = loss_cal(prediction, labels)
    accuracy = evaluation(prediction, labels)
    summary_op = tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES))
    train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
        for i in range(TRAINING_STEPS):
            img_inputs, img_labels = mnist.train.next_batch(BATCH_SIZE)
            img_inputs = np.reshape(img_inputs, (BATCH_SIZE, 28, 28, 1))
            _, loss_value, acc, summary = sess.run([train_step, loss, accuracy, summary_op], feed_dict={inputs: img_inputs, labels: img_labels})
            # _, loss_value, step, acc = sess.run([train_op, loss, global_step, accuracy], feed_dict={inputs: img_inputs, labels: img_labels})
            if i % 10 == 0:
                print("After {} training steps, loss is {}, accuracy: {}".format(i, loss_value, acc))
                # print("After {} training steps, loss is {}, accuracy: {}".format(step, loss_value, acc)
                saver.save(sess, os.path.join(MODEL_PATH, MODEL_NAME))
            summary_writer.add_summary(summary, i)

4 载入模型及预测

def load_model_only_with_params():
    g_params = tf.Graph()
    with g_params.as_default():
        inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
        labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
        prediction = inference(inputs, 0.5)
    mnist = input_data.read_data_sets("./mnist_data", one_hot=True)
    img = mnist.test.images[0]
    img = np.reshape(img, (1, 28, 28, 1))
    img_label = mnist.test.labels[0]
    img_label = np.argmax(img_label)
    with tf.Session(graph=g_params) as sess:
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state("./conv_models")
        model_path = ckpt.model_checkpoint_path
        saver.restore(sess, model_path)
        pre = sess.run(prediction, feed_dict={inputs: img})
        pre_num = tf.argmax(pre, 1)
        pre_num = sess.run(pre_num)
        print("prediction: {}, real: {}".format(pre_num[0], img_label))

prediction: 7, real: 7

5 完整程序

import tensorflow as tf 
from tensorflow.examples.tutorials.mnist import input_data
import os
import numpy as np


LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARAZTION_RATE = 0.0001
LEARNING_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
MODEL_PATH = "./conv_models"
MODEL_NAME = "conv_model.ckpt"
LOG_DIR = "./logs"
BATCH_SIZE = 100
if not os.path.exists(MODEL_PATH):
    os.makedirs(MODEL_PATH)

def conv2d(input_tensor, ksize, strides, pad, name_w, name_b):
    weights = tf.get_variable(name=name_w, shape=ksize, dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))
    biases = tf.get_variable(name=name_b, shape=[ksize[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
    conv = tf.nn.conv2d(input_tensor, weights, strides=strides, padding=pad)
    conv = tf.nn.relu(conv + biases)
    return conv

def max_pooling(input_tensor, ksize, strides, pad):
    max_pool = tf.nn.max_pool(input_tensor, ksize=ksize, strides=strides, padding=pad)
    return max_pool

def fullc(input_tensor, wsize, name_w, name_b):
    weights = tf.get_variable(name=name_w, shape=wsize, dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))
    biases = tf.get_variable(name=name_b, shape=[wsize[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
    fullc = tf.matmul(input_tensor, weights) + biases
    return fullc

def inference(inputs, keep_prob):
    with tf.name_scope("conv_1"):
        conv_1 = conv2d(inputs, [5, 5, 1, 32], [1, 1, 1, 1], "SAME", "cw_1", "cb_1")
    with tf.name_scope("max_pool_1"):
        pooling_1 = max_pooling(conv_1, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
    with tf.name_scope("conv_2"):
        conv_2 = conv2d(pooling_1, [5, 5, 32, 64], [1, 1, 1, 1], "SAME", "cw_2", "cb_2")
    with tf.name_scope("max_pool_2"):
        pooling_2 = max_pooling(conv_2, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
    feature_shape = pooling_2.get_shape()
    flatten_1 = feature_shape[1].value * feature_shape[2].value * feature_shape[3].value
    feature_reshape = tf.reshape(pooling_2, [-1, flatten_1])
    with tf.name_scope("fc_1"):
        fc_1 = fullc(feature_reshape, [flatten_1, 512], "fw_1", "fb_1")
        fc_1 = tf.nn.dropout(fc_1, keep_prob)
    with tf.name_scope("fc_2"):
        fc_2 = fullc(fc_1, [512, 10], "fw_2", "fb_2")
    return fc_2

def loss_cal(prediction, labels):
    with tf.name_scope("loss"):
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=tf.argmax(labels, 1))
        loss = tf.reduce_mean(cross_entropy)
        tf.summary.scalar("loss", loss)
        return loss


def evaluation(logits, labels):
    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar("accuracy", accuracy)
        return accuracy

def train(mnist):
    inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
    labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    prediction = inference(inputs, 0.5)

    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=tf.argmax(labels, 1))
    loss = tf.reduce_mean(cross_entropy)
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples/BATCH_SIZE, LEARNING_RATE_DECAY)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    # accuracy = evaluation(prediction, labels)
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name="train")

    saver = tf.train.Saver()
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        for i in range(TRAINING_STEPS):
            img_inputs, img_labels = mnist.train.next_batch(BATCH_SIZE)
            img_inputs = np.reshape(img_inputs, (BATCH_SIZE, 28, 28, 1))
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={inputs: img_inputs, labels: img_labels})
            # _, loss_value, step, acc = sess.run([train_op, loss, global_step, accuracy], feed_dict={inputs: img_inputs, labels: img_labels})

            if i % 10 == 0:
                print("After {} training steps, loss is {}".format(step, loss_value))
                # print("After {} training steps, loss is {}, accuracy: {}".format(step, loss_value, acc)
                saver.save(sess, os.path.join(MODEL_PATH, MODEL_NAME), global_step=global_step)

def train_new(mnist):  
    inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
    labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
    prediction = inference(inputs, 0.5)
    loss = loss_cal(prediction, labels)
    accuracy = evaluation(prediction, labels)
    summary_op = tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES))
    train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
        for i in range(TRAINING_STEPS):
            img_inputs, img_labels = mnist.train.next_batch(BATCH_SIZE)
            img_inputs = np.reshape(img_inputs, (BATCH_SIZE, 28, 28, 1))
            _, loss_value, acc, summary = sess.run([train_step, loss, accuracy, summary_op], feed_dict={inputs: img_inputs, labels: img_labels})
            # _, loss_value, step, acc = sess.run([train_op, loss, global_step, accuracy], feed_dict={inputs: img_inputs, labels: img_labels})
            if i % 10 == 0:
                print("After {} training steps, loss is {}, accuracy: {}".format(i, loss_value, acc))
                # print("After {} training steps, loss is {}, accuracy: {}".format(step, loss_value, acc)
                saver.save(sess, os.path.join(MODEL_PATH, MODEL_NAME))
            summary_writer.add_summary(summary, i)

def load_model_only_with_params():
    g_params = tf.Graph()
    with g_params.as_default():
        inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
        labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
        prediction = inference(inputs, 0.5)
    mnist = input_data.read_data_sets("./mnist_data", one_hot=True)
    img = mnist.test.images[0]
    img = np.reshape(img, (1, 28, 28, 1))
    img_label = mnist.test.labels[0]
    img_label = np.argmax(img_label)
    with tf.Session(graph=g_params) as sess:
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state("./conv_models")
        model_path = ckpt.model_checkpoint_path
        saver.restore(sess, model_path)
        pre = sess.run(prediction, feed_dict={inputs: img})
        pre_num = tf.argmax(pre, 1)
        pre_num = sess.run(pre_num)
        print("prediction: {}, real: {}".format(pre_num[0], img_label))

def main(argv=None):
    mnist = input_data.read_data_sets("./mnist_data", one_hot=True).
    # 训练
	train_new(mnist)
	# 载入训练模型
    # load_model_only_with_params()

if __name__ == "__main__":
    tf.app.run()