对于CNN(卷积神经网络)最早可以追溯到1986年BP算法的提出,然后1989年LeCun将其用到多层神经网络中,直到1998年LeCun提出LeNet-5模型,神经网络的雏形完成。第一个典型的CNN就是LeNet5网络结构,但是今天我们要讲的主角是AlexNet也就是文章《ImageNet Classification with Deep Convolutional Neural Networks》所介绍的网络结构。Alex等人在2012年提出的AlexNet网络结构模型在ILSVRC-2012上以巨大的优势获得第一名,引爆了神经网络的应用热潮,使得卷积神经网络CNN成为在图像分类上的核心算法模型。这篇论文阐述了一个多层卷积网络,目标是将120万高分辨率的图像分成1000类。
class AlexNet(object):
def __init__(self, x, keep_prob, num_classes, skip_layer,
weights_path='DEFAULT'):
"""
Create the graph of the AlexNet model.
Args:
x: Placeholder for the input tensor.
keep_prob: Dropout probability.
num_classes: Number of classes in the dataset.
skip_layer: List of names of the layer, that get trained from
scratch
weights_path: Complete path to the pretrained weight file, if it
isn't in the same folder as this code
"""
# Parse input arguments into class variables
self.X = x
self.NUM_CLASSES = num_classes
self.KEEP_PROB = keep_prob
self.SKIP_LAYER = skip_layer
if weights_path == 'DEFAULT':
self.WEIGHTS_PATH = 'bvlc_alexnet.npy'
else:
self.WEIGHTS_PATH = weights_path
# Call the create function to build the computational graph of AlexNet
self.create()
def create(self):
# 1st Layer: Conv (w ReLu) -> Lrn -> Pool
conv1 = conv(self.X, 11, 11, 96, 4, 4, padding='VALID', name='conv1')
norm1 = lrn(conv1, 2, 1e-04, 0.75, name='norm1')
pool1 = max_pool(norm1, 3, 3, 2, 2, padding='VALID', name='pool1')
# 2nd Layer: Conv (w ReLu) -> Lrn -> Pool with 2 groups
conv2 = conv(pool1, 5, 5, 256, 1, 1, groups=2, name='conv2')
norm2 = lrn(conv2, 2, 1e-04, 0.75, name='norm2')
pool2 = max_pool(norm2, 3, 3, 2, 2, padding='VALID', name='pool2')
# 3rd Layer: Conv (w ReLu)
conv3 = conv(pool2, 3, 3, 384, 1, 1, name='conv3')
# 4th Layer: Conv (w ReLu) splitted into two groups
conv4 = conv(conv3, 3, 3, 384, 1, 1, groups=2, name='conv4')
# 5th Layer: Conv (w ReLu) -> Pool splitted into two groups
conv5 = conv(conv4, 3, 3, 256, 1, 1, groups=2, name='conv5')
pool5 = max_pool(conv5, 3, 3, 2, 2, padding='VALID', name='pool5')
# 6th Layer: Flatten -> FC (w ReLu) -> Dropout
flattened = tf.reshape(pool5, [-1, 6*6*256])
fc6 = fc(flattened, 6*6*256, 4096, name='fc6')
dropout6 = dropout(fc6, self.KEEP_PROB)
# 7th Layer: FC (w ReLu) -> Dropout
fc7 = fc(dropout6, 4096, 4096, name='fc7')
dropout7 = dropout(fc7, self.KEEP_PROB)
# 8th Layer: FC and return unscaled activations
self.fc8 = fc(dropout7, 4096, self.NUM_CLASSES, relu=False, name='fc8')
def load_initial_weights(self, session):
# Load the weights into memory
weights_dict = np.load(self.WEIGHTS_PATH, encoding='bytes').item()
# Loop over all layer names stored in the weights dict
for op_name in weights_dict:
# Check if layer should be trained from scratch
if op_name not in self.SKIP_LAYER:
with tf.variable_scope(op_name, reuse=True):
# Assign weights/biases to their corresponding tf variable
for data in weights_dict[op_name]:
# Biases
if len(data.shape) == 1:
var = tf.get_variable('biases', trainable=False)
session.run(var.assign(data))
# Weights
else:
var = tf.get_variable('weights', trainable=False)
session.run(var.assign(data))