我正在努力拯救受过训练的CNN,并恢复CNN以供评估。很多保存和恢复CNN的例子都可以在堆栈溢出中找到。
然而,在我的例子中,保存的CNN的平均推断错误与恢复的CNN的推断错误不完全相同。
这是训练和保存的代码。
import tensorflow as tf
import network as net
...
checkpoint_prefix = os.path.join(path_to_save, "saved_checkpoint")
checkpoint_state_name = "checkpoint_state"
input_graph_name = "input_graph.pb"
output_graph_name = "output_graph.pb"
# # FOR TRAINING IMAGES
image, label = _get_image_and_label(learning_params)
image_batch, label_batch = tf.train.shuffle_batch([image, label], batch_size=mini_batch_size, num_threads=8,
capacity=min_queue_examples_train + 3*mini_batch_size,
min_after_dequeue=min_queue_examples_train)
label_batch = tf.reshape(label_batch, [mini_batch_size, label_dim])
# # FOR TEST IMAGES
image_, label_ = _get_image_and_label_eval(learning_params)
image_batch_, label_batch_ = tf.train.batch([image_, label_], batch_size=mini_batch_size, num_threads=1,
capacity=min_queue_examples_test)
label_batch_ = tf.reshape(label_batch_, [mini_batch_size, label_dim])
image_batch = tf.placeholder_with_default(image_batch, shape=
[mini_batch_size, height, width, depth])
label_batch = tf.placeholder_with_default(label_batch, shape=
[mini_batch_size, label_dim])
image_batch_ = tf.placeholder_with_default(image_batch_, shape=
[mini_batch_size, height, width, depth])
label_batch_ = tf.placeholder_with_default(label_batch_, shape=
[mini_batch_size, label_dim])
# construct graph for CNN
cost, train_op, regression = net.CNN()
saver = tf.train.Saver(max_to_keep=1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
....
tf.train.write_graph(sess.graph_def, path_to_save, input_graph_name, as_text=False)
for epoch in range(num_epochs):
# # TRAINING
for step in range(num_batches_train):
X, Y = sess.run([image_batch, label_batch])
cost_value, _ = sess.run([cost, train_op], feed_dict={net.X: X, net.Y: Y})
cost_buffer.append(cost_value)
# # EVALUATION
for step in range(num_batches_test):
X, Y = sess.run([image_batch_, label_batch_])
inference = sess.run(regression, feed_dict={net.X: X})
error = abs(inference - Y)
# # SAVE TRAINED NETWORK WHEN THE BEST PERFORMER APPEARS
if min_Cost > np.mean(cost_buffer):
min_Cost = np.mean(cost_buffer)
checkpoint_path = saver.save(sess, checkpoint_prefix, global_step=0, latest_filename=checkpoint_state_name)
coord.request_stop()
coord.join(threads)
sess.close()
这是用于恢复和计算的代码。
import tensorflow as tf
...
image, label = _get_image_and_label_eval()
image_batch, label_batch = tf.train.batch([image, label], batch_size=mini_batch_size, num_threads=1, capacity=min_queue_examples)
label_batch = tf.reshape(label_batch, [mini_batch_size, 3])
with tf.Session() as sess:
# Step1) Initialize global variables
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# Step2) Load graph from meta file (meta file contains the graph that I had defined before)
saver = tf.train.import_meta_graph(os.path.join(path_to_saved_network, 'saved_checkpoint-0.meta'))
# Step3) Restore all the weights values
saver.restore(sess, os.path.join(net_dir, 'saved_checkpoint-0'))
print('Trained Deep Network is restored')
# Step4) Recall placeholder and operation
graph = tf.get_default_graph()
x = graph.get_tensor_by_name("input_node:0")
r = graph.get_tensor_by_name("output_node:0")
# # EVALUATION
for step in range(num_batches):
X, Y = sess.run([image_batch, label_batch])
inference = sess.run(r, feed_dict={x: X})
error = abs(inference - Y)
coord.request_stop()
coord.join(threads)
sess.close()
简而言之,保存代码中的错误=abs(推理-Y)的平均值与恢复代码中的错误=abs(推理-Y)的平均值不同。
我已经验证了在保存和恢复过程中用于推理的输入图像是相同的。
另外,我不使用‘tf.tra.exponentialMovingAverage’函数。
发布于 2018-06-26 09:32:18
我没有读过你的代码,对你来说看我的发现可能已经太晚了。但我也遇到了同样的问题。如果保存和恢复模型,则预测是垃圾。
我用tensorflow 1.8
。它与python3.6
但不是python3.5
。
https://stackoverflow.com/questions/-100001681
复制相似问题