Keras非常便捷的提供了图片预处理的类--ImageDataGenerator 可以用这个图片生成器生成一个batch周期内的数据,它支持实时的数据扩展,训练的时候会无限生成数据,一直到达设定的epoch次数才停止。
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from glob import glob
import matplotlib.pyplot as plt
import cv2
import numpy as np
# 图片生成器
datagen = ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
# 打印转换前的图片
img = load_img('dog-project-master/kaggle/train/cat/cat.4.jpg')
plt.imshow(img)
plt.title('Before:')
plt.show()
# 将图片转换为数组,并重新设定形状
x = img_to_array(img)
x = x.reshape((1,) + x.shape)
# x的形状重组为(1,width,height,channels),第一个参数为batch_size
# 这里人工设置停止生成, 并保存图片用于可视化
i = 0
for batch in datagen.flow(x,batch_size=1,save_to_dir='E://udacity-dl/transform_cat',save_prefix='cat',save_format='png'):
i +=1
if i > 3 :
break # 如果不设置,生成器会不断的生成图片
# 打印转换后的图片
cat_images = np.array(glob("transform_cat/*"))
cat_image = cv2.imread(cat_images[1])
plt.imshow(cat_image)
plt.title("After1")
plt.show()
cat_image2 = cv2.imread(cat_images[2])
plt.imshow(cat_image2)
plt.title("After2")
plt.show()
cat_image3 = cv2.imread(cat_images[3])
plt.imshow(cat_image3)
plt.title("After3")
plt.show()
1)和.flow()配合使用
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)
datagen = ImageDataGenerator(
featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True)
# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(x_train)
# fits the model on batches with real-time data augmentation:
model.fit_generator(datagen.flow(x_train, y_train, batch_size=32),
steps_per_epoch=len(x_train), epochs=epochs)
# here's a more "manual" example
for e in range(epochs):
print 'Epoch', e
batches = 0
for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=32):
loss = model.train(x_batch, y_batch)
batches += 1
if batches >= len(x_train) / 32:
# we need to break the loop by hand because
# the generator loops indefinitely
break
2)和.flow_from_directory() 配合使用
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
'data/train',
target_size=(150, 150),
batch_size=32,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
'data/validation',
target_size=(150, 150),
batch_size=32,
class_mode='binary')
model.fit_generator(
train_generator,
steps_per_epoch=2000,
epochs=50,
validation_data=validation_generator,
validation_steps=800)