Fashion_minst 图像识别 by PyTorch CNN

用户6021899

发布于 2022-03-04 12:47:34

4210

发布于 2022-03-04 12:47:34

Fashion_minst 是之前介绍 Tensorflow 时用过的数据集。下面用 PyTorch 再跑它一遍，用作 PyTorch 的入门示例。

深度神经网络的训练和监控-- FashionMNIST_train.py：

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor


# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.A
test_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=4, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=256)


# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()

        conv1_outs = 16
        # 1 为in_chanels, 第一个卷积层时为图片的chanel数（黑白为1，彩色为3或4）
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=conv1_outs, kernel_size=(2, 2), stride=(1, 1)),  # out 8*27*27
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),  # 池化stride 默认等于核尺寸 # out 8*13*13
            nn.BatchNorm2d(num_features=conv1_outs, eps=1e-5, momentum=0.1, affine=True),  # num_features 跟随卷积的参数out_channels
            nn.ReLU(inplace=True)  # 不改变 size
        )
        conv2_outs = 32
        """机器学习中，进行模型训练之前，需对数据做归一化处理，使其分布一致。在深度神经网络训练过程中，通常一次训练是一个batch，而非全体数据。
        每个batch具有不同的分布产生了internal covarivate shift问题——在训练过程中，数据分布会发生变化，对下一层网络的学习带来困难。Batch 
        Normalization将数据拉回到标准正态分布上(归一化)，一方面使得数据分布一致，另一方面避免梯度消失、梯度爆炸。BatchNorm2d 不改变 size"""
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=conv1_outs, out_channels=conv2_outs, kernel_size=(2, 2), stride=(1, 1)),  # out 32*12*12
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),  # 池化stride 默认等于核尺寸 # out 32*6*6
            nn.BatchNorm2d(num_features=conv2_outs, eps=1e-5, momentum=0.1, affine=True),
            nn.ReLU(inplace=True)  # in place 可节省存储空间
        )
        # self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(2, 2))

        self.flatten = nn.Flatten()

        self.fcs = nn.Sequential(
            nn.Linear(1152, 512),  # 1152 = 32*6*6 # 第一个全连接层的input尺寸要特别计算
            nn.ReLU(inplace=True),
            nn.Linear(512, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 10),
            nn.ReLU(inplace=True)
        )

        # self.relu = nn.ReLU()

        # 也可不使用nn.Sequential
        #self.fc1 = nn.Linear(2304, 512)  # 2304 = 16*12*12
        #self.fc2 = nn.Linear(512, 512)
        #self.fc3 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.flatten(x)  # 扁平化时会保留第一维度（batch），其它维度展平。最终成二维张量
        x = self.fcs(x)

        """nn.ReLu is a class, not a function. When you do x=nn.ReLu(x) you are instantiating the class nn.ReLu, 
        not computing a relu. You can either replace nn.ReLu by it’s corresponding nn.functional.relu or to instantiate
         the activation in the init:self.relu=nn.ReLu() and replace x = nn.ReLU(x) by ``x=self.relu(x)"""
        # x = self.relu(self.fc1(x))
        # x = self.relu(self.fc2(x))
        # x = self.relu(self.fc3(x))

        return x


def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)

    train_loss, correct = 0, 0
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()  ##

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()  # only for monitor
        # if batch % 100 == 0: # 训练监控
            # loss, current = loss.item(), batch * len(X)
            # print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    train_loss /= num_batches
    correct /= size

    train_Avg_loss.append(train_loss)
    train_Accuracy.append(100*correct)
    print(f"training Accuracy: {(100*correct):>0.1f}")


def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size

    test_Avg_loss.append(test_loss)
    test_Accuracy.append(100*correct)

    print(f"Test Accuracy: {(100*correct):>0.1f}%, Test Avg loss: {test_loss:>8f}\n")


if __name__ == "__main__":
    for X, y in test_dataloader:
        print(f"Shape of X [N, C, H, W]: {X.shape}")
        print(f"Shape of y: {y.shape} {y.dtype}")
        break


    # Get cpu or gpu device for training.
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using {device} device")
    # device = "cpu"
    model = NeuralNetwork().to(device)
    print(model)

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
    # 设置动态学习率，每step_size 个 epochs后， lr *= gamma 。
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

    from matplotlib import pyplot as plt
    from matplotlib import ticker
    train_Accuracy = []
    train_Avg_loss = []
    test_Accuracy = []
    test_Avg_loss = []
    epochs = 40
    for t in range(epochs):
        print(f"Epoch {t+1}:")
        print(f"current learning rate is {scheduler.get_last_lr()[0]}")
        train(train_dataloader, model, loss_fn, optimizer)
        scheduler.step()
        test(test_dataloader, model, loss_fn)
    print("Done.")

    torch.save(model.state_dict(), "FashionMNIST_mode.pth")
    print("Saved PyTorch Model State to model13.pth")

    # 绘图显示正确率和平均损失
    plt.subplot(2, 1, 1)
    plt.plot(range(1, epochs+1), train_Accuracy, "r-", label="train_Accuracy")
    plt.plot(range(1, epochs+1), test_Accuracy, "b-", label="test_Accuracy")
    plt.xlabel("Epoch")
    xticker_formatter = ticker.FuncFormatter(lambda x, pos: "%d" % x)
    plt.gca().xaxis.set_major_formatter(xticker_formatter)
    plt.ylabel("Accuracy[%]")
    plt.legend()
    plt.grid()

    plt.subplot(2, 1, 2)
    plt.plot(range(1, epochs+1), train_Avg_loss, "r-", label="train_Avg_loss")
    plt.plot(range(1, epochs+1), test_Avg_loss, "b-", label="test_Avg_loss")
    plt.xlabel("Epoch")
    plt.gca().xaxis.set_major_formatter(xticker_formatter)
    plt.ylabel("Avg_loss")
    plt.legend()
    plt.grid()

    plt.savefig("Accuracy and loss plot.png")
    plt.show()

代码中神经网络的结构包含两个卷积序列（含卷积，Pooling，批归一化和ReLu）和一个全连接层序列（含3个全连接层和ReLU）：

NeuralNetwork(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(2, 2), stride=(1, 1))
    (1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU(inplace=True)
  )
  (conv2): Sequential(
    (0): Conv2d(8, 32, kernel_size=(2, 2), stride=(1, 1))
    (1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU(inplace=True)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fcs): Sequential(
    (0): Linear(in_features=1152, out_features=512, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU(inplace=True)
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU(inplace=True)
  )
)

30 轮训练之后，在测试集上的准确率高达98.0%。

调用训练好的神经网络来识别图片：

import torch
from torch.autograd import Variable
from FashionMNIST_train import NeuralNetwork, test_data

model = NeuralNetwork()
model.load_state_dict(torch.load("model1.pth"))

classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
with torch.no_grad():
    for i in range(20): # 预测测试集前20张图片
        x, y = test_data[i][0], test_data[i][1]
        
        x = Variable(torch.unsqueeze(x, dim=0), requires_grad=False)
        # 3维升4维。全连接不用加。用卷积时不加的话会报类似以下错误： Expected 4-dimensional input for 
        # 4-dimensional weight [16, 1, 2, 2], but got 3-dimensional input of size [1, 28, 28] instead
        
        pred = model(x)
        predicted, actual = classes[pred[0].argmax(0)], classes[y]
        print(f'Predicted: "{predicted}", Actual: "{actual}"')

本文参与腾讯云自媒体同步曝光计划，分享自微信公众号。

原始发表：2022-02-08，如有侵权请联系 cloudcommunity@tencent.com 删除

pytorch