公众号后台回复“python“,立刻领取100本机器学习必备Python电子书
GoogLeNet Inception V3在《Rethinking the Inception Architecture for Computer Vision》中提出(注意,在这篇论文中作者把该网络结构叫做v2版,我们以最终的v4版论文的划分为标准),该论文的亮点在于:
网络结构设计的准则
前面也说过,深度学习网络的探索更多是个实验科学,在实验中人们总结出一些结构设计准则,但说实话我觉得不一定都有实操性:
以及一个n×n卷积核通过顺序相连的两个1×n和n×1做降维(有点像矩阵分解),如果n=3,计算性能可以提升1-(3+3)/9=33%,但如果考虑高性能计算性能,这种分解可能会造成L1 cache miss率上升。
平滑样本标注
对于多分类的样本标注一般是one-hot的,例如[0,0,0,1],使用类似交叉熵的损失函数会使得模型学习中对ground truth标签分配过于置信的概率,并且由于ground truth标签的logit值与其他标签差距过大导致,出现过拟合,导致降低泛化性。一种解决方法是加正则项,即对样本标签给个概率分布做调节,使得样本标注变成“soft”的,例如[0.1,0.2,0.1,0.6],这种方式在实验中降低了top-1和top-5的错误率0.2%。
网络结构
代码实践
为了能在单机跑起来,对feature map做了缩减,为适应cifar10的输入大小,对输入的stride做了调整,代码如下。
# -*- coding: utf-8 -*-
import numpy as np
from keras.layers import Input, merge, Dropout, Dense, Lambda, Flatten, Activation, merge
from keras.layers.convolutional import MaxPooling2D, Conv2D, AveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import concatenate, add
from keras.regularizers import l1_l2
from keras.models import Model
from keras.callbacks import CSVLogger, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.5), cooldown=0, patience=3, min_lr=1e-6)
early_stopper = EarlyStopping(monitor='val_acc', min_delta=0.0005, patience=15)
csv_logger = CSVLogger('resnet34_cifar10.csv')
from keras.utils.vis_utils import plot_model
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.datasets import cifar10
from keras import backend as K
import tensorflow as tf
tf.python.control_flow_ops = tf
import warnings
warnings.filterwarnings('ignore')
filter_control = 8
def bn_relu(input):
"""Helper to build a BN -> relu block
"""
norm = BatchNormalization()(input)
return Activation("relu")(norm)
def before_inception(input_shape, small_mode=False):
input_layer = input_shape
if small_mode:
strides = (1, 1)
else:
strides = (2, 2)
before_conv1_3x3 = Conv2D(name="before_conv1_3x3/2",
filters=32 // filter_control,
kernel_size=(3, 3),
strides=strides,
kernel_initializer='he_normal',
activation='relu',
kernel_regularizer=l1_l2(0.00001))(input_layer)
before_conv2_3x3 = Conv2D(name="before_conv2_3x3/1",
filters=32 // filter_control,
kernel_size=(3, 3),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
kernel_regularizer=l1_l2(0.00001))(before_conv1_3x3)
before_conv3_3x3 = Conv2D(name="before_conv3_3x3/1",
filters=64 // filter_control,
kernel_size=(3, 3),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(before_conv2_3x3)
before_pool1_3x3 = MaxPooling2D(name="before_pool1_3x3/2",
pool_size=(3, 3),
strides=strides,
padding='valid')(before_conv3_3x3)
before_conv4_3x3 = Conv2D(name="before_conv4_3x3/1",
filters=80 // filter_control,
kernel_size=(3, 3),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='valid',
kernel_regularizer=l1_l2(0.00001))(before_pool1_3x3)
before_conv5_3x3 = Conv2D(name="before_conv3_3x3/2",
filters=192 // filter_control,
kernel_size=(3, 3),
strides=strides,
kernel_initializer='he_normal',
activation='relu',
padding='valid',
kernel_regularizer=l1_l2(0.00001))(before_conv4_3x3)
before_conv6_3x3 = Conv2D(name="before_conv6_3x3/1",
filters=288 // filter_control,
kernel_size=(3, 3),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='valid',
kernel_regularizer=l1_l2(0.00001))(before_conv5_3x3)
return before_conv6_3x3
def inception_A(i, input_shape):
input_layer = input_shape
# (20,20,288)
inception_A_conv1_1x1 = Conv2D(name="inception_A_conv1_1x1/1" + i,
filters=64 // filter_control,
kernel_size=(1, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(input_layer)
inception_A_conv2_3x3 = Conv2D(name="inception_A_conv2_3x3/1" + i,
filters=96 // filter_control,
kernel_size=(3, 3),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_A_conv1_1x1)
inception_A_conv3_3x3 = Conv2D(name="inception_A_conv3_3x3/1" + i,
filters=96 // filter_control,
kernel_size=(3, 3),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_A_conv2_3x3)
inception_A_conv4_1x1 = Conv2D(name="inception_A_conv4_1x1/1" + i,
filters=48 // filter_control,
kernel_size=(1, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(input_layer)
inception_A_conv5_3x3 = Conv2D(name="inception_A_conv5_3x3/1" + i,
filters=64 // filter_control,
kernel_size=(3, 3),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_A_conv4_1x1)
inception_A_pool1_3x3 = AveragePooling2D(name="inception_A_pool1_3x3/1" + i,
pool_size=(3, 3),
strides=(1, 1),
padding='same')(input_layer)
inception_A_conv6_1x1 = Conv2D(name="inception_A_conv6_1x1/1" + i,
filters=64 // filter_control,
kernel_size=(1, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_A_pool1_3x3)
inception_A_conv7_1x1 = Conv2D(name="inception_A_conv7_1x1/1" + i,
filters=64 // filter_control,
kernel_size=(1, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(input_layer)
inception_A_merge1 = concatenate([inception_A_conv3_3x3, inception_A_conv5_3x3, inception_A_conv6_1x1, inception_A_conv7_1x1])
return bn_relu(inception_A_merge1)
def inception_B(i, input_shape):
input_layer = input_shape
inception_B_conv1_1x1 = Conv2D(name="inception_B_conv1_1x1/1" + i,
filters=128 // filter_control,
kernel_size=(1, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(input_layer)
inception_B_conv2_1x7 = Conv2D(name="inception_A_conv2_3x3/1" + i,
filters=128 // filter_control,
kernel_size=(1, 7),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_B_conv1_1x1)
inception_B_conv3_7x1 = Conv2D(name="inception_B_conv3_7x1/1" + i,
filters=128 // filter_control,
kernel_size=(7, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_B_conv2_1x7)
inception_B_conv4_1x7 = Conv2D(name="inception_B_conv4_1x7/1" + i,
filters=128 // filter_control,
kernel_size=(1, 7),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_B_conv3_7x1)
inception_B_conv5_7x1 = Conv2D(name="inception_B_conv5_7x1/1" + i,
filters=192 // filter_control,
kernel_size=(7, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_B_conv4_1x7)
inception_B_conv6_1x1 = Conv2D(name="inception_B_conv6_1x1/1" + i,
filters=128 // filter_control,
kernel_size=(1, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(input_layer)
inception_B_conv7_1x7 = Conv2D(name="inception_B_conv7_1x7/1" + i,
filters=128 // filter_control,
kernel_size=(1, 7),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_B_conv6_1x1)
inception_B_conv8_7x1 = Conv2D(name="inception_B_conv8_7x1/1" + i,
filters=192 // filter_control,
kernel_size=(7, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_B_conv7_1x7)
inception_B_pool1_3x3 = AveragePooling2D(name="inception_B_pool1_3x3/1" + i,
pool_size=(3, 3),
strides=(1, 1),
padding='same')(input_layer)
inception_B_conv9_1x1 = Conv2D(name="inception_B_conv9_1x1/1" + i,
filters=192 // filter_control,
kernel_size=(1, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_B_pool1_3x3)
inception_B_conv10_1x1 = Conv2D(name="inception_B_conv10_1x1/1" + i,
filters=192 // filter_control,
kernel_size=(1, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(input_layer)
inception_B_merge1 = concatenate(
[inception_B_conv5_7x1, inception_B_conv8_7x1, inception_B_conv9_1x1, inception_B_conv10_1x1])
return bn_relu(inception_B_merge1)
def inception_C(i, input_shape):
input_layer = input_shape
inception_C_conv1_1x1 = Conv2D(name="inception_C_conv1_1x1/1" + i,
filters=448 // filter_control,
kernel_size=(1, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(input_layer)
inception_C_conv2_3x3 = Conv2D(name="inception_C_conv2_3x3/1" + i,
filters=384 // filter_control,
kernel_size=(3, 3),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_C_conv1_1x1)
inception_C_conv3_1x3 = Conv2D(name="inception_C_conv3_1x3/1" + i,
filters=384 // filter_control,
kernel_size=(1, 3),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_C_conv2_3x3)
inception_C_conv4_3x1 = Conv2D(name="inception_C_conv4_3x1/1" + i,
filters=384 // filter_control,
kernel_size=(3, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_C_conv2_3x3)
inception_C_merge1 = concatenate([inception_C_conv3_1x3, inception_C_conv4_3x1])
inception_C_conv5_1x1 = Conv2D(name="inception_C_conv5_1x1/1" + i,
filters=384 // filter_control,
kernel_size=(1, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(input_layer)
inception_C_conv6_1x3 = Conv2D(name="inception_C_conv6_1x3/1" + i,
filters=384 // filter_control,
kernel_size=(1, 3),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_C_conv5_1x1)
inception_C_conv7_3x1 = Conv2D(name="inception_C_conv7_3x1/1" + i,
filters=384 // filter_control,
kernel_size=(3, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_C_conv5_1x1)
inception_C_merge2 = concatenate([inception_C_conv6_1x3, inception_C_conv7_3x1])
inception_C_pool1_3x3 = AveragePooling2D(name="inception_C_pool1_3x3/1" + i,
pool_size=(3, 3),
strides=(1, 1),
padding='same')(input_layer)
inception_C_conv8_1x1 = Conv2D(name="inception_C_conv8_1x1/1" + i,
filters=192 // filter_control,
kernel_size=(1, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(inception_C_pool1_3x3)
inception_C_conv9_1x1 = Conv2D(name="inception_C_conv9_1x1/1" + i,
filters=320 // filter_control,
kernel_size=(1, 1),
strides=(1, 1),
kernel_initializer='he_normal',
activation='relu',
padding='same',
kernel_regularizer=l1_l2(0.00001))(input_layer)
inception_C_merge3 = concatenate(
[inception_C_merge1, inception_C_merge2, inception_C_conv8_1x1, inception_C_conv9_1x1])
return bn_relu(inception_C_merge3)
def create_inception_v3(input_shape, nb_classes=10, small_mode=False):
input_layer = Input(input_shape)
x = before_inception(input_layer, small_mode)
# 3 x Inception A
for i in range(3):
x = inception_A(str(i), x)
# 5 x Inception B
for i in range(5):
x = inception_B(str(i), x)
# 2 x Inception C
for i in range(2):
x = inception_C(str(i), x)
x = AveragePooling2D((8, 8), strides=(1, 1))(x)
# Dropout
x = Dropout(0.8)(x)
x = Flatten()(x)
# Output
out = Dense(output_dim=nb_classes, activation='softmax')(x)
model = Model(input_layer, output=out, name='Inception-v3')
return model
if __name__ == "__main__":
with tf.device('/gpu:3'):
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1, allow_growth=True)
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
tf.Session(config=K.tf.ConfigProto(allow_soft_placement=True,
log_device_placement=True,
gpu_options=gpu_options))
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# reorder dimensions for tensorflow
x_train = np.transpose(x_train.astype('float32') / 255., (0, 1, 2, 3))
x_test = np.transpose(x_test.astype('float32') / 255., (0, 1, 2, 3))
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
s = x_train.shape[1:]
batch_size = 128
nb_epoch = 10
nb_classes = 10
model = create_inception_v3(s, nb_classes)
model.summary()
plot_model(model, to_file="GoogLeNet-Inception-V3.jpg", show_shapes=True)
model.compile(optimizer='adadelta',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size, nb_epoch=nb_epoch, verbose=1,
validation_data=(x_test, y_test), shuffle=True,
callbacks=[])
# Model saving callback
checkpointer = ModelCheckpoint("weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5", monitor='val_loss',
verbose=0,
save_best_only=False, save_weights_only=False, mode='auto')
print('Using real-time data augmentation.')
datagen_train = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=0,
width_shift_range=0.125,
height_shift_range=0.125,
horizontal_flip=True,
vertical_flip=False)
datagen_train.fit(x_train)
history = model.fit_generator(datagen_train.flow(x_train, y_train, batch_size=batch_size, shuffle=True),
samples_per_epoch=x_train.shape[0],
nb_epoch=nb_epoch, verbose=1,
validation_data=(x_test, y_test),
callbacks=[lr_reducer, early_stopper, csv_logger, checkpointer])