前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >(二)Tensorflow搭建卷积神经网络实现MNIST手写字体识别及预测

(二)Tensorflow搭建卷积神经网络实现MNIST手写字体识别及预测

原创
作者头像
xdq101
修改2019-06-03 10:49:19
7080
修改2019-06-03 10:49:19
举报
文章被收录于专栏:人工智能项目人工智能项目

1 搭建卷积神经网络

1.0 网络结构

图1.0 卷积网络结构
图1.0 卷积网络结构

1.2 网络分析

序号

网络层

描述

1

卷积层

一张原始图像(28, 28, 1),batch=1,经过卷积处理,得到图像特征(28, 28, 32)

2

下采样

即池化层,最大池化后图像特征(14, 14, 32)

3

卷积层

将池化特征(14, 14, 32)卷积处理后,得到图像特征(14, 14, 64)

4

下采样

最大池化,得到图像特征(7, 7, 64)

5

全连接层

将上一层即池化层的图像特征经过矩阵内积计算,拉成一个向量(7764=3136),特征为(1, 3136)

6

全连接层

继续矩阵计算,得到特征为(1, 512)

7

全连接

高斯矩阵计算,得到特征(1, 10)

2 网络结构

2.1 网络结构可视化

图2.1 网络结构
图2.1 网络结构

2.2 网络结构-源

代码语言:txt
复制
def conv2d(input_tensor, ksize, strides, pad, name_w, name_b):
    weights = tf.get_variable(name=name_w, shape=ksize, dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))
    biases = tf.get_variable(name=name_b, shape=[ksize[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
    conv = tf.nn.conv2d(input_tensor, weights, strides=strides, padding=pad)
    conv = tf.nn.relu(conv + biases)
    return conv

def max_pooling(input_tensor, ksize, strides, pad):
    max_pool = tf.nn.max_pool(input_tensor, ksize=ksize, strides=strides, padding=pad)
    return max_pool

def fullc(input_tensor, wsize, name_w, name_b):
    weights = tf.get_variable(name=name_w, shape=wsize, dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))
    biases = tf.get_variable(name=name_b, shape=[wsize[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
    fullc = tf.matmul(input_tensor, weights) + biases
    return fullc
def inference(inputs, keep_prob):
    with tf.name_scope("conv_1"):
        conv_1 = conv2d(inputs, [5, 5, 1, 32], [1, 1, 1, 1], "SAME", "cw_1", "cb_1")
    with tf.name_scope("max_pool_1"):
        pooling_1 = max_pooling(conv_1, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
    with tf.name_scope("conv_2"):
        conv_2 = conv2d(pooling_1, [5, 5, 32, 64], [1, 1, 1, 1], "SAME", "cw_2", "cb_2")
    with tf.name_scope("max_pool_2"):
        pooling_2 = max_pooling(conv_2, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
    feature_shape = pooling_2.get_shape()
    flatten_1 = feature_shape[1].value * feature_shape[2].value * feature_shape[3].value
    feature_reshape = tf.reshape(pooling_2, [-1, flatten_1])
    with tf.name_scope("fc_1"):
        fc_1 = fullc(feature_reshape, [flatten_1, 512], "fw_1", "fb_1")
        fc_1 = tf.nn.dropout(fc_1, keep_prob)
    with tf.name_scope("fc_2"):
        fc_2 = fullc(fc_1, [512, 10], "fw_2", "fb_2")
    return fc_2

3 训练及测试

3.1 载入数据

代码语言:txt
复制
mnist = input_data.read_data_sets("./mnist_data", one_hot=True)
img_inputs, img_labels = mnist.train.next_batch(BATCH_SIZE)
img_inputs = np.reshape(img_inputs, (BATCH_SIZE, 28, 28, 1))

3.2 训练及保存模型

代码语言:txt
复制
def train_new(mnist):  
    inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
    labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
    prediction = inference(inputs, 0.5)
    loss = loss_cal(prediction, labels)
    accuracy = evaluation(prediction, labels)
    summary_op = tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES))
    train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
        for i in range(TRAINING_STEPS):
            img_inputs, img_labels = mnist.train.next_batch(BATCH_SIZE)
            img_inputs = np.reshape(img_inputs, (BATCH_SIZE, 28, 28, 1))
            _, loss_value, acc, summary = sess.run([train_step, loss, accuracy, summary_op], feed_dict={inputs: img_inputs, labels: img_labels})
            # _, loss_value, step, acc = sess.run([train_op, loss, global_step, accuracy], feed_dict={inputs: img_inputs, labels: img_labels})
            if i % 10 == 0:
                print("After {} training steps, loss is {}, accuracy: {}".format(i, loss_value, acc))
                # print("After {} training steps, loss is {}, accuracy: {}".format(step, loss_value, acc)
                saver.save(sess, os.path.join(MODEL_PATH, MODEL_NAME))
            summary_writer.add_summary(summary, i)

4 载入模型及预测

代码语言:txt
复制
def load_model_only_with_params():
    g_params = tf.Graph()
    with g_params.as_default():
        inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
        labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
        prediction = inference(inputs, 0.5)
    mnist = input_data.read_data_sets("./mnist_data", one_hot=True)
    img = mnist.test.images[0]
    img = np.reshape(img, (1, 28, 28, 1))
    img_label = mnist.test.labels[0]
    img_label = np.argmax(img_label)
    with tf.Session(graph=g_params) as sess:
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state("./conv_models")
        model_path = ckpt.model_checkpoint_path
        saver.restore(sess, model_path)
        pre = sess.run(prediction, feed_dict={inputs: img})
        pre_num = tf.argmax(pre, 1)
        pre_num = sess.run(pre_num)
        print("prediction: {}, real: {}".format(pre_num[0], img_label))
代码语言:txt
复制
prediction: 7, real: 7

5 完整程序

代码语言:txt
复制
import tensorflow as tf 
from tensorflow.examples.tutorials.mnist import input_data
import os
import numpy as np


LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARAZTION_RATE = 0.0001
LEARNING_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
MODEL_PATH = "./conv_models"
MODEL_NAME = "conv_model.ckpt"
LOG_DIR = "./logs"
BATCH_SIZE = 100
if not os.path.exists(MODEL_PATH):
    os.makedirs(MODEL_PATH)

def conv2d(input_tensor, ksize, strides, pad, name_w, name_b):
    weights = tf.get_variable(name=name_w, shape=ksize, dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))
    biases = tf.get_variable(name=name_b, shape=[ksize[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
    conv = tf.nn.conv2d(input_tensor, weights, strides=strides, padding=pad)
    conv = tf.nn.relu(conv + biases)
    return conv

def max_pooling(input_tensor, ksize, strides, pad):
    max_pool = tf.nn.max_pool(input_tensor, ksize=ksize, strides=strides, padding=pad)
    return max_pool

def fullc(input_tensor, wsize, name_w, name_b):
    weights = tf.get_variable(name=name_w, shape=wsize, dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))
    biases = tf.get_variable(name=name_b, shape=[wsize[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
    fullc = tf.matmul(input_tensor, weights) + biases
    return fullc

def inference(inputs, keep_prob):
    with tf.name_scope("conv_1"):
        conv_1 = conv2d(inputs, [5, 5, 1, 32], [1, 1, 1, 1], "SAME", "cw_1", "cb_1")
    with tf.name_scope("max_pool_1"):
        pooling_1 = max_pooling(conv_1, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
    with tf.name_scope("conv_2"):
        conv_2 = conv2d(pooling_1, [5, 5, 32, 64], [1, 1, 1, 1], "SAME", "cw_2", "cb_2")
    with tf.name_scope("max_pool_2"):
        pooling_2 = max_pooling(conv_2, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
    feature_shape = pooling_2.get_shape()
    flatten_1 = feature_shape[1].value * feature_shape[2].value * feature_shape[3].value
    feature_reshape = tf.reshape(pooling_2, [-1, flatten_1])
    with tf.name_scope("fc_1"):
        fc_1 = fullc(feature_reshape, [flatten_1, 512], "fw_1", "fb_1")
        fc_1 = tf.nn.dropout(fc_1, keep_prob)
    with tf.name_scope("fc_2"):
        fc_2 = fullc(fc_1, [512, 10], "fw_2", "fb_2")
    return fc_2

def loss_cal(prediction, labels):
    with tf.name_scope("loss"):
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=tf.argmax(labels, 1))
        loss = tf.reduce_mean(cross_entropy)
        tf.summary.scalar("loss", loss)
        return loss


def evaluation(logits, labels):
    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar("accuracy", accuracy)
        return accuracy

def train(mnist):
    inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
    labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    prediction = inference(inputs, 0.5)

    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=tf.argmax(labels, 1))
    loss = tf.reduce_mean(cross_entropy)
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples/BATCH_SIZE, LEARNING_RATE_DECAY)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    # accuracy = evaluation(prediction, labels)
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name="train")

    saver = tf.train.Saver()
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        for i in range(TRAINING_STEPS):
            img_inputs, img_labels = mnist.train.next_batch(BATCH_SIZE)
            img_inputs = np.reshape(img_inputs, (BATCH_SIZE, 28, 28, 1))
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={inputs: img_inputs, labels: img_labels})
            # _, loss_value, step, acc = sess.run([train_op, loss, global_step, accuracy], feed_dict={inputs: img_inputs, labels: img_labels})

            if i % 10 == 0:
                print("After {} training steps, loss is {}".format(step, loss_value))
                # print("After {} training steps, loss is {}, accuracy: {}".format(step, loss_value, acc)
                saver.save(sess, os.path.join(MODEL_PATH, MODEL_NAME), global_step=global_step)

def train_new(mnist):  
    inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
    labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
    prediction = inference(inputs, 0.5)
    loss = loss_cal(prediction, labels)
    accuracy = evaluation(prediction, labels)
    summary_op = tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES))
    train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
        for i in range(TRAINING_STEPS):
            img_inputs, img_labels = mnist.train.next_batch(BATCH_SIZE)
            img_inputs = np.reshape(img_inputs, (BATCH_SIZE, 28, 28, 1))
            _, loss_value, acc, summary = sess.run([train_step, loss, accuracy, summary_op], feed_dict={inputs: img_inputs, labels: img_labels})
            # _, loss_value, step, acc = sess.run([train_op, loss, global_step, accuracy], feed_dict={inputs: img_inputs, labels: img_labels})
            if i % 10 == 0:
                print("After {} training steps, loss is {}, accuracy: {}".format(i, loss_value, acc))
                # print("After {} training steps, loss is {}, accuracy: {}".format(step, loss_value, acc)
                saver.save(sess, os.path.join(MODEL_PATH, MODEL_NAME))
            summary_writer.add_summary(summary, i)

def load_model_only_with_params():
    g_params = tf.Graph()
    with g_params.as_default():
        inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="img-inputs")
        labels = tf.placeholder(tf.float32, [None, 10], name="label-outputs")
        prediction = inference(inputs, 0.5)
    mnist = input_data.read_data_sets("./mnist_data", one_hot=True)
    img = mnist.test.images[0]
    img = np.reshape(img, (1, 28, 28, 1))
    img_label = mnist.test.labels[0]
    img_label = np.argmax(img_label)
    with tf.Session(graph=g_params) as sess:
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state("./conv_models")
        model_path = ckpt.model_checkpoint_path
        saver.restore(sess, model_path)
        pre = sess.run(prediction, feed_dict={inputs: img})
        pre_num = tf.argmax(pre, 1)
        pre_num = sess.run(pre_num)
        print("prediction: {}, real: {}".format(pre_num[0], img_label))

def main(argv=None):
    mnist = input_data.read_data_sets("./mnist_data", one_hot=True).
    # 训练
	train_new(mnist)
	# 载入训练模型
    # load_model_only_with_params()

if __name__ == "__main__":
    tf.app.run()

6 训练结果

6.1 损失值

图6.1 损失值
图6.1 损失值

6.2 准确度

图6.1 评估精度
图6.1 评估精度

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 1 搭建卷积神经网络
    • 1.0 网络结构
      • 1.2 网络分析
      • 2 网络结构
        • 2.1 网络结构可视化
          • 2.2 网络结构-源
          • 3 训练及测试
            • 3.1 载入数据
              • 3.2 训练及保存模型
              • 4 载入模型及预测
              • 5 完整程序
              • 6 训练结果
                • 6.1 损失值
                  • 6.2 准确度
                  领券
                  问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档