from tensorflow.keras.datasets.boston_housing import load_data
from keras.datasets import boston_housing
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
Using TensorFlow backend.
# 查看数据
train_data.shape
# 包含404个样本13个特征
(404, 13)
# 数据标准化
# 将数据处理为均数为0,标准差为1的数据,也就是正态化
mean = train_data.mean(axis=0)#按照列取均值
train_data -= mean#相当于train_data=train_data-mean
std = train_data.std(axis=0)#按照列取标准差
train_data /= std#除以标准差
test_data -= mean
test_data /= std
# 构建网络
# 模型定义
from keras import models
from keras import layers
def build_model():# 定义函数构建
model = models.Sequential()
model.add(layers.Dense(64, activation='relu',
input_shape=(train_data.shape[1],)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
#均方误损失,指标为平均绝对误差(预测与目标值之差的绝对差)
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return model#返回模型参数
#k折验证
import numpy as np#导入numpy包
k = 4# 四折
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []#用于保存结果
for i in range(k):# 循环实现k次运算
print('processing fold #', i)#进度条
#留取验证数据集
val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]#分段取值
val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]
# 准备训练集
#concatenate函数为拼接函数,目的是拼接出去验证集的所有数据
partial_train_data = np.concatenate(
[train_data[:i * num_val_samples],
train_data[(i + 1) * num_val_samples:]],
axis=0)
partial_train_targets = np.concatenate(
[train_targets[:i * num_val_samples],
train_targets[(i + 1) * num_val_samples:]],
axis=0)
model = build_model()#调用之前定义的函数
model.fit(partial_train_data, partial_train_targets,
epochs=num_epochs, batch_size=1, verbose=0)
# 评估模型
val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
all_scores.append(val_mae)#追加验证结果
# 验证的val_mae取均值
np.mean(all_scores)
processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3
2.4407541155815125
结果显示平均房价差2.553,相当于2553美元
# 对模型参数进行调整
num_epochs = 500# 这次迭代次数选择500
all_mae_histories = []# 这里的目的是保存每次计算的结果
for i in range(k):
print('processing fold #', i)
# 以下跟之前一样
val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]
partial_train_data = np.concatenate(
[train_data[:i * num_val_samples],
train_data[(i + 1) * num_val_samples:]],
axis=0)
partial_train_targets = np.concatenate(
[train_targets[:i * num_val_samples],
train_targets[(i + 1) * num_val_samples:]],
axis=0)
model = build_model()
history = model.fit(partial_train_data, partial_train_targets,
validation_data=(val_data, val_targets),
epochs=num_epochs, batch_size=1, verbose=0)
# 这里要保存所有模型的结果
mae_history = history.history['val_mae']
all_mae_histories.append(mae_history)# 每次运算追加结果
processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3
# 计算每个轮次中所有折MAE的平均值
# 这里是列表循环式,相当于循环,对MAE群均值
average_mae_history = [
np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]
# 绘图查看
import matplotlib.pyplot as plt
plt.plot(range(1, len(average_mae_history) + 1), average_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()
# 可以看出随着迭代的次数的增加,MAE是减小的
# 为了找到最佳的迭代点,我们将上述的图进行调整放大
#寻找最小值
# 这个函数为了平滑曲线
def smooth_curve(points, factor=0.9):
smoothed_points = []
for point in points:
if smoothed_points:
previous = smoothed_points[-1]
smoothed_points.append(previous * factor + point * (1 - factor))
else:
smoothed_points.append(point)
return smoothed_points
# 从上图看出前10个点范围太大,这里去掉前10个
smooth_mae_history = smooth_curve(average_mae_history[10:])
plt.plot(range(1, len(smooth_mae_history) + 1), smooth_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()
从上图看出迭代次数选择45会好点
# 重新建模
model = build_model()
# 试了45 不如50好,可能每次随机不一样吧,不知道怎么设置种子
model.fit(train_data, train_targets,
epochs=50, batch_size=16, verbose=0)
test_mse_score, test_mae_score = model.evaluate(test_data, test_targets)
test_mae_score
# 最终在测试集上,与实际价格差2826元
102/102 [==============================] - 0s 122us/step
2.837068557739258
三次的深度学习基本涵盖了神经网络的基础内容 蛇咬着自己的尾巴,结束也是开始,后续继续深入
peace&love