# 深度学习入门数据集--2.fasion-mnist数据集

Fasion-MNIST是一位老师推荐给我的，要求我在做完MNIST-handwriting数据之后，再玩一下fmnist。这个数据集也是28*28的灰度图像，你可以从下面的图片看清图片内容。便签也是10类，恤、裤子、套衫、裙子、外套、凉鞋、汗衫、运动鞋、包、踝靴。这个数据集是由一家德国的时尚科技公司Zalando提供的，我觉一家公司把定位成时尚科技公司，而不是电商平台，是把科技创新能力作为主要生产力。未来，会更多如此定位的公司，医疗科技公司、美食科技公司、出行科技公司、建筑科技公司~。本文主要用Keras编写模型，训练数据，并以清晰的可视化方式展示。

## 查看数据

```import numpy
import matplotlib.pyplot as plt
import gzip

IMAGE_SIZE = 28
NUM_CHANNELS = 1
PIXEL_DEPTH = 255
NUM_LABELS = 10
def extract_data(filename, num_images):
print('Extracting', filename)
with gzip.open(filename) as bytestream:
buf = bytestream.read(IMAGE_SIZE * IMAGE_SIZE * num_images * NUM_CHANNELS)
data = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.float32)
data = numpy.reshape(data, [num_images, -1])
return data
def extract_labels(filename, num_images):
"""Extract the labels into a vector of int64 label IDs."""
print('Extracting', filename)
with gzip.open(filename) as bytestream:
labels = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.int64)
return labels

path= '/Users/wangsen/ai/13/homework/fmnist'
train_data = extract_data(path+'/train-images-idx3-ubyte.gz', 60000)
train_labels = extract_labels(path+'/train-labels-idx1-ubyte.gz', 60000)
test_data = extract_data(path+'/t10k-images-idx3-ubyte.gz', 10000)
test_labels = extract_labels(path+'/t10k-labels-idx1-ubyte.gz', 10000)
labels = ['T恤','裤子','套衫','裙子','外套','凉鞋','汗衫','运动鞋','包','踝靴']

pic = train_data[10]
pic = pic.reshape(28,28)
k = 0
fig, axes1 = plt.subplots(10,10,figsize=(6,7.5))
for i in range(10):
for j in range(10):
axes1[i][j].set_axis_off()
pic = train_data[k]
pic = pic.reshape(28,28)
axes1[i][j].imshow(pic,cmap='gray')
axes1[i][j].set_title(labels[train_labels[k]],fontsize=9)
k = k+1
plt.show()```

## Keras 构造Medal

```import numpy as np
import matplotlib.pyplot as plt
import gzip
import os

import numpy as np

paths = [
'/Users/wangsen/ai/13/homework/fmnist/train-labels-idx1-ubyte.gz', '/Users/wangsen/ai/13/homework/fmnist/train-images-idx3-ubyte.gz',
'/Users/wangsen/ai/13/homework/fmnist/t10k-labels-idx1-ubyte.gz', '/Users/wangsen/ai/13/homework/fmnist/t10k-images-idx3-ubyte.gz'
]

with gzip.open(paths[0], 'rb') as lbpath:

with gzip.open(paths[1], 'rb') as imgpath:
x_train = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)

with gzip.open(paths[2], 'rb') as lbpath:

with gzip.open(paths[3], 'rb') as imgpath:
x_test = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28)
print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)

# Print the number of training and test datasets
print(x_train.shape[0], 'train set')
print(x_test.shape[0], 'test set')

# Define the text labels
# fashion_mnist_labels = ["T-shirt/top",  # index 0
#                         "Trouser",      # index 1
#                         "Pullover",     # index 2
#                         "Dress",        # index 3
#                         "Coat",         # index 4
#                         "Sandal",       # index 5
#                         "Shirt",        # index 6
#                         "Sneaker",      # index 7
#                         "Bag",          # index 8
#                         "Ankle boot"]   # index 9
fashion_mnist_labels=['T恤','裤子','套衫','裙子','外套','凉鞋','汗衫','运动鞋','包','踝靴']
# Image index, you can pick any number between 0 and 59,999
img_index = 5
# y_train contains the lables, ranging from 0 to 9
label_index = y_train[img_index]
# Print the label, for example 2 Pullover
print ("y = " + str(label_index) + " " +(fashion_mnist_labels[label_index]))
# # Show one of the images from the training dataset

x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# Further break training data into train / validation sets (# put 5000 into validation set and keep remaining 55,000 for train)
(x_train, x_valid) = x_train[5000:], x_train[:5000]
(y_train, y_valid) = y_train[5000:], y_train[:5000]

# Reshape input data from (28, 28) to (28, 28, 1)
w, h = 28, 28
x_train = x_train.reshape(x_train.shape[0], w, h, 1)
x_valid = x_valid.reshape(x_valid.shape[0], w, h, 1)
x_test = x_test.reshape(x_test.shape[0], w, h, 1)

# One-hot encode the labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_valid = tf.keras.utils.to_categorical(y_valid, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Print training set shape
print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)

# Print the number of training, validation, and test datasets
print(x_train.shape[0], 'train set')
print(x_valid.shape[0], 'validation set')
print(x_test.shape[0], 'test set')

model = tf.keras.Sequential()

# Must define the input shape in the first layer of the neural network

# Take a look at the model summary
model.summary()

model.compile(loss='categorical_crossentropy',
metrics=['accuracy'])

from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(filepath='model.weights.best.hdf5', verbose = 1, save_best_only=True)
model.fit(x_train,
y_train,
batch_size=64,
epochs=10,
validation_data=(x_valid, y_valid),
callbacks=[checkpointer])
# Evaluate the model on test set
score = model.evaluate(x_test, y_test, verbose=0)```
• 测试 从代码可以看出，用Keras编写代码，建模、保存参数等代码量都较小。最后的结果显示，15个例子中有2个错误例子，一个“T恤”识别成“包”，一个把“汗衫”识别成“外套”。

```import tensorflow as tf

import numpy as np
import matplotlib.pyplot as plt
import gzip
import os

import numpy as np

paths = [
'/Users/wangsen/ai/13/homework/fmnist/train-labels-idx1-ubyte.gz', '/Users/wangsen/ai/13/homework/fmnist/train-images-idx3-ubyte.gz',
'/Users/wangsen/ai/13/homework/fmnist/t10k-labels-idx1-ubyte.gz', '/Users/wangsen/ai/13/homework/fmnist/t10k-images-idx3-ubyte.gz'
]

with gzip.open(paths[0], 'rb') as lbpath:

with gzip.open(paths[1], 'rb') as imgpath:
x_train = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)

with gzip.open(paths[2], 'rb') as lbpath:

with gzip.open(paths[3], 'rb') as imgpath:
x_test = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28)
print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)

# Print the number of training and test datasets
print(x_train.shape[0], 'train set')
print(x_test.shape[0], 'test set')
fashion_mnist_labels=['T恤','裤子','套衫','裙子','外套','凉鞋','汗衫','运动鞋','包','踝靴']
# Image index, you can pick any number between 0 and 59,999
img_index = 5
# y_train contains the lables, ranging from 0 to 9
label_index = y_train[img_index]
# Print the label, for example 2 Pullover
print ("y = " + str(label_index) + " " +(fashion_mnist_labels[label_index]))
# # Show one of the images from the training dataset

x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# Further break training data into train / validation sets (# put 5000 into validation set and keep remaining 55,000 for train)
(x_train, x_valid) = x_train[5000:], x_train[:5000]
(y_train, y_valid) = y_train[5000:], y_train[:5000]

# Reshape input data from (28, 28) to (28, 28, 1)
w, h = 28, 28
x_train = x_train.reshape(x_train.shape[0], w, h, 1)
x_valid = x_valid.reshape(x_valid.shape[0], w, h, 1)
x_test = x_test.reshape(x_test.shape[0], w, h, 1)

# One-hot encode the labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_valid = tf.keras.utils.to_categorical(y_valid, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Print training set shape
print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)

# Print the number of training, validation, and test datasets
print(x_train.shape[0], 'train set')
print(x_valid.shape[0], 'validation set')
print(x_test.shape[0], 'test set')

model = tf.keras.Sequential()

# Must define the input shape in the first layer of the neural network

from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(filepath='model.weights.best.hdf5', verbose = 1, save_best_only=True)

y_hat = model.predict(x_test)

figure = plt.figure(figsize=(20, 8))
for i, index in enumerate(np.random.choice(x_test.shape[0], size=15, replace=False)):
ax = figure.add_subplot(3, 5, i + 1, xticks=[], yticks=[])
# Display each image
ax.imshow(np.squeeze(x_test[index]))
predict_index = np.argmax(y_hat[index])
true_index = np.argmax(y_test[index])
# Set the title for each image
ax.set_title("{} ({})".format(fashion_mnist_labels[predict_index],
fashion_mnist_labels[true_index]),
color=("green" if predict_index == true_index else "red"))
plt.show()```

0 条评论

• ### 机器学习入门数据集--4.泰坦尼克幸存者预测

最后只选取8个维度 Pclass Age SibSp Parch Sex Cabin Fare Embarked。dum...

• ### Python 传值还是传引用

如果 node =None，相当于node指向一个不可变对象，在调用insert函数时，仅传值。

• ### 腾讯云--GPU训练cifar10

腾讯云比阿里云的GPU服务器更多一些，在阿里云上有时会出现没有GPU服务器或者售罄。

• ### 使用LSTM模型预测股价基于Keras

编者按：本文介绍了如何使用LSTM模型进行时间序列预测。股票市场的数据由于格式规整和非常容易获得，是作为研究的很好选择。但不要把本文的结论当作理财或交易建议。

• ### 基于RNN和LSTM的股市预测方法

对许多研究人员和分析师来说，预测股价的艺术一直是一项艰巨的任务。事实上，投资者对股票价格预测的研究领域非常感兴趣。许多投资者都渴望知道股票市场的未来情况。良好和...

• ### 【DS】利用Keras长短期记忆(LSTM)模型预测股票价格

在本教程中，我们将构建一个Python深度学习模型，用于预测股票价格的未来行为。我们假设读者熟悉Python中的深度学习概念，特别是LSTM。

• ### 独家 | 教你使用简单神经网络和LSTM进行时间序列预测（附代码）

下载波动性标准普尔500数据集，时间范围是：2011年2月11日至2019年2月11日。我的目标是采用ANN和LSTM来预测波动性标准普尔500时间序列。

• ### 动手学深度学习(五) 梯度消失、梯度爆炸

深度模型有关数值稳定性的典型问题是消失（vanishing）和爆炸（explosion）。

• ### 机器学习-文本分类（2）-新闻文本分类

参考：https://mp.weixin.qq.com/s/6vkz18Xw4USZ3fldd_wf5g

• ### 基于机器学习的文本分类！

据不完全统计，网民们平均每人每周收到的垃圾邮件高达10封左右。垃圾邮件浪费网络资源的同时，还消耗了我们大量的时间。大家对此深恶痛绝，于是识别垃圾邮件并对其进行过...