前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >Fashion_minst 图像识别 by PyTorch CNN

Fashion_minst 图像识别 by PyTorch CNN

作者头像
用户6021899
发布2022-03-04 12:47:34
4210
发布2022-03-04 12:47:34
举报
文章被收录于专栏:Python编程 pyqt matplotlib

Fashion_minst 是之前介绍 Tensorflow 时用过的数据集。下面用 PyTorch 再跑它一遍 ,用作 PyTorch 的入门示例。

深度神经网络的训练和监控-- FashionMNIST_train.py:

代码语言:javascript
复制
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor


# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.A
test_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=4, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=256)


# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()

        conv1_outs = 16
        # 1 为in_chanels, 第一个卷积层时为图片的chanel数(黑白为1,彩色为3或4)
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=conv1_outs, kernel_size=(2, 2), stride=(1, 1)),  # out 8*27*27
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),  # 池化stride 默认等于核尺寸 # out 8*13*13
            nn.BatchNorm2d(num_features=conv1_outs, eps=1e-5, momentum=0.1, affine=True),  # num_features 跟随卷积的参数out_channels
            nn.ReLU(inplace=True)  # 不改变 size
        )
        conv2_outs = 32
        """机器学习中,进行模型训练之前,需对数据做归一化处理,使其分布一致。在深度神经网络训练过程中,通常一次训练是一个batch,而非全体数据。
        每个batch具有不同的分布产生了internal covarivate shift问题——在训练过程中,数据分布会发生变化,对下一层网络的学习带来困难。Batch 
        Normalization将数据拉回到标准正态分布上(归一化),一方面使得数据分布一致,另一方面避免梯度消失、梯度爆炸。BatchNorm2d 不改变 size"""
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=conv1_outs, out_channels=conv2_outs, kernel_size=(2, 2), stride=(1, 1)),  # out 32*12*12
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),  # 池化stride 默认等于核尺寸 # out 32*6*6
            nn.BatchNorm2d(num_features=conv2_outs, eps=1e-5, momentum=0.1, affine=True),
            nn.ReLU(inplace=True)  # in place 可节省存储空间
        )
        # self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(2, 2))

        self.flatten = nn.Flatten()

        self.fcs = nn.Sequential(
            nn.Linear(1152, 512),  # 1152 = 32*6*6 # 第一个全连接层的input尺寸要特别计算
            nn.ReLU(inplace=True),
            nn.Linear(512, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 10),
            nn.ReLU(inplace=True)
        )

        # self.relu = nn.ReLU()

        # 也可不使用nn.Sequential
        #self.fc1 = nn.Linear(2304, 512)  # 2304 = 16*12*12
        #self.fc2 = nn.Linear(512, 512)
        #self.fc3 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.flatten(x)  # 扁平化时会保留第一维度(batch),其它维度展平。最终成二维张量
        x = self.fcs(x)

        """nn.ReLu is a class, not a function. When you do x=nn.ReLu(x) you are instantiating the class nn.ReLu, 
        not computing a relu. You can either replace nn.ReLu by it’s corresponding nn.functional.relu or to instantiate
         the activation in the init:self.relu=nn.ReLu() and replace x = nn.ReLU(x) by ``x=self.relu(x)"""
        # x = self.relu(self.fc1(x))
        # x = self.relu(self.fc2(x))
        # x = self.relu(self.fc3(x))

        return x


def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)

    train_loss, correct = 0, 0
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()  ##

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()  # only for monitor
        # if batch % 100 == 0: # 训练监控
            # loss, current = loss.item(), batch * len(X)
            # print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    train_loss /= num_batches
    correct /= size

    train_Avg_loss.append(train_loss)
    train_Accuracy.append(100*correct)
    print(f"training Accuracy: {(100*correct):>0.1f}")


def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size

    test_Avg_loss.append(test_loss)
    test_Accuracy.append(100*correct)

    print(f"Test Accuracy: {(100*correct):>0.1f}%, Test Avg loss: {test_loss:>8f}\n")


if __name__ == "__main__":
    for X, y in test_dataloader:
        print(f"Shape of X [N, C, H, W]: {X.shape}")
        print(f"Shape of y: {y.shape} {y.dtype}")
        break


    # Get cpu or gpu device for training.
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using {device} device")
    # device = "cpu"
    model = NeuralNetwork().to(device)
    print(model)

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
    # 设置动态学习率,每step_size 个 epochs后, lr *= gamma 。
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

    from matplotlib import pyplot as plt
    from matplotlib import ticker
    train_Accuracy = []
    train_Avg_loss = []
    test_Accuracy = []
    test_Avg_loss = []
    epochs = 40
    for t in range(epochs):
        print(f"Epoch {t+1}:")
        print(f"current learning rate is {scheduler.get_last_lr()[0]}")
        train(train_dataloader, model, loss_fn, optimizer)
        scheduler.step()
        test(test_dataloader, model, loss_fn)
    print("Done.")

    torch.save(model.state_dict(), "FashionMNIST_mode.pth")
    print("Saved PyTorch Model State to model13.pth")

    # 绘图显示正确率和平均损失
    plt.subplot(2, 1, 1)
    plt.plot(range(1, epochs+1), train_Accuracy, "r-", label="train_Accuracy")
    plt.plot(range(1, epochs+1), test_Accuracy, "b-", label="test_Accuracy")
    plt.xlabel("Epoch")
    xticker_formatter = ticker.FuncFormatter(lambda x, pos: "%d" % x)
    plt.gca().xaxis.set_major_formatter(xticker_formatter)
    plt.ylabel("Accuracy[%]")
    plt.legend()
    plt.grid()

    plt.subplot(2, 1, 2)
    plt.plot(range(1, epochs+1), train_Avg_loss, "r-", label="train_Avg_loss")
    plt.plot(range(1, epochs+1), test_Avg_loss, "b-", label="test_Avg_loss")
    plt.xlabel("Epoch")
    plt.gca().xaxis.set_major_formatter(xticker_formatter)
    plt.ylabel("Avg_loss")
    plt.legend()
    plt.grid()

    plt.savefig("Accuracy and loss plot.png")
    plt.show()

代码中神经网络的结构包含两个卷积序列(含卷积,Pooling,批归一化和ReLu)和一个全连接层序列(含3个全连接层和ReLU):

代码语言:javascript
复制
NeuralNetwork(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(2, 2), stride=(1, 1))
    (1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU(inplace=True)
  )
  (conv2): Sequential(
    (0): Conv2d(8, 32, kernel_size=(2, 2), stride=(1, 1))
    (1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU(inplace=True)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fcs): Sequential(
    (0): Linear(in_features=1152, out_features=512, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU(inplace=True)
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU(inplace=True)
  )
)

30 轮训练之后,在测试集上的准确率高达98.0%。

调用训练好的神经网络来识别图片:

代码语言:javascript
复制
代码语言:javascript
复制
import torch
from torch.autograd import Variable
from FashionMNIST_train import NeuralNetwork, test_data

model = NeuralNetwork()
model.load_state_dict(torch.load("model1.pth"))

classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
with torch.no_grad():
    for i in range(20): # 预测测试集前20张图片
        x, y = test_data[i][0], test_data[i][1]
        
        x = Variable(torch.unsqueeze(x, dim=0), requires_grad=False)
        # 3维升4维。全连接不用加。用卷积时不加的话会报类似以下错误: Expected 4-dimensional input for 
        # 4-dimensional weight [16, 1, 2, 2], but got 3-dimensional input of size [1, 28, 28] instead
        
        pred = model(x)
        predicted, actual = classes[pred[0].argmax(0)], classes[y]
        print(f'Predicted: "{predicted}", Actual: "{actual}"')   
本文参与 腾讯云自媒体同步曝光计划,分享自微信公众号。
原始发表:2022-02-08,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 Python可视化编程机器学习OpenCV 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档