cifar10.py文件包含以下函数,用于搭建模型
def _activation_summary(x):
def _variable_on_cpu(name, shape, initializer):
def _variable_with_weight_decay(name, shape, stddev, wd):
def distorted_inputs():
def inputs(eval_data):
def inference(images):
def loss(logits, labels):
def _add_loss_summaries(total_loss):
def train(total_loss, global_step):
def maybe_download_and_extract():
详细代码如下:
#执行main函数之前通过设置FLAGS来传递tf.app.run()所需的参数
FLAGS = tf.app.flags.FLAGS
# 模型基础参数初始化,括号内为(名称,默认值,帮助信息)
tf.app.flags.DEFINE_integer('batch_size', 128,
"""Number of images to process in a batch.""")
tf.app.flags.DEFINE_string('data_dir', '/tmp/cifar10_data',
"""Path to the CIFAR-10 data directory.""")
tf.app.flags.DEFINE_boolean('use_fp16', False,
"""Train the model using fp16.""")
#描述 CIFAR-10 数据集的全局常量
IMAGE_SIZE = cifar10_input.IMAGE_SIZE
NUM_CLASSES = cifar10_input.NUM_CLASSES
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL
#描述训练过程的常量
MOVING_AVERAGE_DECAY = 0.9999 # 移动平均衰减 The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # 学习速率以350为周期减少 Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1 # 学习速率衰减因子 Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.1 # 初始化学习速率 Initial learning rate.
TOWER_NAME = 'tower'
DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
tf.app.flags.DEFINE_integer()等函数是添加了命令行的可选参数
tf.app.flags.FLAGS 可以从对应的命令行参数中读取参数: 例如 batch_size= FLAGS.batch_size
def _activation_summary(x):
tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
tf.summary.histogram(tensor_name + '/activations', x)
#tf.summary.histogram(tags, values, collections=None, name=None) 用来显示直方图信息
tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
#tf.summary.scalar(tags, values, collections=None, name=None) 显示标量信息并记录x中0占的比例,用以衡量x的稀疏性。
创建直方图及衡量x的稀疏性,并在tensorboard展现出来。
re.sub功能是对于一个输入的字符串,利用正则表达式,来实现字符串替换处理的功能,并返回处理后的字符串
def _variable_on_cpu(name, shape, initializer):
# 在cpu 缓存上创建一个名为name,大小为shape的变量。
with tf.device('/cpu:0'):
#指定模型运行的设备,tensorflow不区分CPU的设备号,设置为0即可,GPU区分设备号\gpu:0和\gpu:1表示两张不同的显卡。
dtype = tf.float16 if FLAGS.use_fp16 else tf.float32
var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
return var
tf.get_variable() 用来获取已存在的变量,如果该变量不存在,就新建一个。而且该函数会检查当前命名空间下是否存在同样名称的变量,可以方便共享变量。
def _variable_with_weight_decay(name, shape, stddev, wd):
dtype = tf.float16 if FLAGS.use_fp16 else tf.float32
var = _variable_on_cpu(
name,
shape,
tf.truncated_normal_initializer(stddev=stddev, dtype=dtype))
#生成具有指定平均值mean和标准差stddev的截断的正态分布
if wd is not None:
weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
#把变量放入一个列表
return var
tf.nn.l2_loss(t, name=None):该函数利用 L2 范数来计算张量的误差值,并且只取 L2 范数的值的一半,下面是从网上找来的例子,方便理解
import tensorflow as tf
a=tf.constant([1,2,3],dtype=tf.float32)
b=tf.constant([[1,1],[2,2],[3,3]],dtype=tf.float32)
with tf.Session() as sess:
print('a:')
print(sess.run(tf.nn.l2_loss(a)))
print('b:')
print(sess.run(tf.nn.l2_loss(b)))
sess.close()
输出结果: a: 7.0 #(1^2+2^2+3^2)/2=(1+4+9)/2=7 b: 14.0 # 计算同上
def distorted_inputs():
if not FLAGS.data_dir:
raise ValueError('Please supply a data_dir')
data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')
images, labels = cifar10_input.distorted_inputs(data_dir=data_dir,
batch_size=FLAGS.batch_size)
if FLAGS.use_fp16:
images = tf.cast(images, tf.float16)
labels = tf.cast(labels, tf.float16)
return images, labels
def inputs(eval_data):
if not FLAGS.data_dir:
raise ValueError('Please supply a data_dir')
data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')
images, labels = cifar10_input.inputs(eval_data=eval_data,
data_dir=data_dir,
batch_size=FLAGS.batch_size)
if FLAGS.use_fp16:
images = tf.cast(images, tf.float16)
labels = tf.cast(labels, tf.float16)
return images, labels
distorted_inputs ( ) 和inputs( ) 在cifar10_input.py文件中有详细定义
def inference(images):
#搭建CIFAR-10 模型
# conv1
with tf.variable_scope('conv1') as scope:# 为变量指定命名空间
kernel = _variable_with_weight_decay('weights',
shape=[5, 5, 3, 64],
stddev=5e-2,
wd=None)
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv1)
# pool1
pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
# norm1局部相应归一化,使特征更加明显,增强模型的泛化能力
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm1')
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[5, 5, 64, 64],
stddev=5e-2,
wd=None)
conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
pre_activation = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv2)
# norm2
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm2')
# pool2
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# local3
with tf.variable_scope('local3') as scope:
# Move everything into depth so we can perform a single matrix multiply.
reshape = tf.reshape(pool2, [images.get_shape().as_list()[0], -1])
dim = reshape.get_shape()[1].value
weights = _variable_with_weight_decay('weights', shape=[dim, 384],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
_activation_summary(local3)
# local4
with tf.variable_scope('local4') as scope:
weights = _variable_with_weight_decay('weights', shape=[384, 192],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
_activation_summary(local4)
with tf.variable_scope('softmax_linear') as scope:
weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES],
stddev=1/192.0, wd=None)
biases = _variable_on_cpu('biases', [NUM_CLASSES],
tf.constant_initializer(0.0))
softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
_activation_summary(softmax_linear)
return softmax_linear
.get_shape()函数返回的是元组,需要通过as_list()的操作转换成list,sess.run()里面只能运行operation和tensor,不能运行.get_shape(),否则会报错
def loss(logits, labels):
labels = tf.cast(labels, tf.int64) #转换labels类型
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
tf.nn.sparse_softmax_cross_entropy_with_logits()将softmax层与交叉熵函数cross_entropy进行封装,放在一起计算,直接使用标签数据,而非one-hot编码。
def _add_loss_summaries(total_loss):
# 计算移动平均
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [total_loss])
# Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [total_loss]:
# 将每个loss命名为raw,并将损失的移动平均命名为初始=始损失
tf.summary.scalar(l.op.name + ' (raw)', l)
tf.summary.scalar(l.op.name, loss_averages.average(l))
return loss_averages_op
_add_loss_summaries()函数计算单个损失和总损失,并将指数移动平均应用于单个损失
def train(total_loss, global_step):
# 创建一个优化器并应用于所有可训练变量,并为所有可训练变量添加平均移动指数
#参数:
total_loss: Total loss from loss().
global_step:包含训练步数的 Integer 变量
#返回值:
train_op: op for training.
# 影响学习率的变量
num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
# 衰减学习率
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
global_step,
decay_steps,
LEARNING_RATE_DECAY_FACTOR,
staircase=True)
tf.summary.scalar('learning_rate', lr)
# 计算 all losses and associated summaries的移动平均值
loss_averages_op = _add_loss_summaries(total_loss)
# 梯度
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.GradientDescentOptimizer(lr)
grads = opt.compute_gradients(total_loss)
# 应用计算后的梯度
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# 为可训练的变量添加直方图
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
# 为梯度添加直方图
for grad, var in grads:
if grad is not None:
tf.summary.histogram(var.op.name + '/gradients', grad)
# 追踪所有可训练变量的移动平均值
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY, global_step)
with tf.control_dependencies([apply_gradient_op]):
variables_averages_op = variable_averages.apply(tf.trainable_variables())
return variables_averages_op
# 下载并解压数据
def maybe_download_and_extract():
dest_directory = FLAGS.data_dir
if not os.path.exists(dest_directory):
os.makedirs(dest_directory)
filename = DATA_URL.split('/')[-1]
filepath = os.path.join(dest_directory, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin')
if not os.path.exists(extracted_dir_path):
tarfile.open(filepath, 'r:gz').extractall(dest_directory)
为了提高效率,建议先将数据提前下载好。
TensorFlow 中 tf.app.flags.FLAGS 的用法介绍:https://blog.csdn.net/lyc_yongcai/article/details/73456960
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。