前文我们实现了googlenet。想必大家已经有感觉,实现一个神经网络不是很难嘛。但是实现一个神经网络知识深度学习的入门和开始,如何训练和使用一个网络将它使用和应用到工作和需求中,才是我们学习神经网络的初衷。所以今天我们就开始看看怎么使用前文实现的这个神经网络。
googlenet在pytorch里面已经有实现了,同时pytorch还提供了一个使用imagenet 1000分类训练过的权重。所以我们这里直接使用这个预训练模型,然后调整网络结构,将输出变为10分类,使用cifar10数据集重新训练(filetuning)一个属于我们自己的模型。pytorch的google可以从:https://pytorch.org/hub/pytorch_vision_googlenet/[1] 这里根据示例代码直接下载使用 也可以从https://github.com/pytorch/vision/blob/master/torchvision/models/googlenet.py [2]找到源代码。
我们这里使用[2] 提供的googlenet实现,[2]中同时提供了加载imagenet预训练权重的函数,我们进行执行对应的代码加载即可,代码如下:
import warnings from collections import namedtuple import torch import torch.nn as nn import torch.nn.functional as F from torch import Tensor try: from torch.hub import load_state_dict_from_url except ImportError: from torch.utils.model_zoo import load_url as load_state_dict_from_url #from .utils import load_state_dict_from_url from typing import Optional, Tuple, List, Callable, Any __all__ = ['GoogLeNet', 'googlenet', "GoogLeNetOutputs", "_GoogLeNetOutputs"] model_urls = { # GoogLeNet ported from TensorFlow 'googlenet': 'https://download.pytorch.org/models/googlenet-1378be20.pth', } GoogLeNetOutputs = namedtuple('GoogLeNetOutputs', ['logits', 'aux_logits2', 'aux_logits1']) GoogLeNetOutputs.__annotations__ = {'logits': Tensor, 'aux_logits2': Optional[Tensor], 'aux_logits1': Optional[Tensor]} # Script annotations failed with _GoogleNetOutputs = namedtuple ... # _GoogLeNetOutputs set here for backwards compat _GoogLeNetOutputs = GoogLeNetOutputs def googlenet(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> "GoogLeNet": r"""GoogLeNet (Inception v1) model architecture from `"Going Deeper with Convolutions" <http://arxiv.org/abs/1409.4842>`_. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr aux_logits (bool): If True, adds two auxiliary branches that can improve training. Default: *False* when pretrained is True otherwise *True* transform_input (bool): If True, preprocesses the input according to the method with which it was trained on ImageNet. Default: *False* """ if pretrained: if 'transform_input' not in kwargs: kwargs['transform_input'] = True if 'aux_logits' not in kwargs: kwargs['aux_logits'] = False if kwargs['aux_logits']: warnings.warn('auxiliary heads in the pretrained googlenet model are NOT pretrained, ' 'so make sure to train them') original_aux_logits = kwargs['aux_logits'] kwargs['aux_logits'] = True kwargs['init_weights'] = False model = GoogLeNet(**kwargs) state_dict = load_state_dict_from_url(model_urls['googlenet'], progress=progress) model.load_state_dict(state_dict) if not original_aux_logits: model.aux_logits = False model.aux1 = None # type: ignore[assignment] model.aux2 = None # type: ignore[assignment] return model return GoogLeNet(**kwargs) class GoogLeNet(nn.Module): __constants__ = ['aux_logits', 'transform_input'] def __init__( self, num_classes: int = 1000, aux_logits: bool = True, transform_input: bool = False, init_weights: Optional[bool] = None, blocks: Optional[List[Callable[..., nn.Module]]] = None ) -> None: super(GoogLeNet, self).__init__() if blocks is None: blocks = [BasicConv2d, Inception, InceptionAux] if init_weights is None: warnings.warn('The default weight initialization of GoogleNet will be changed in future releases of ' 'torchvision. If you wish to keep the old behavior (which leads to long initialization times' ' due to scipy/scipy#11299), please set init_weights=True.', FutureWarning) init_weights = True assert len(blocks) == 3 conv_block = blocks[0] inception_block = blocks[1] inception_aux_block = blocks[2] self.aux_logits = aux_logits self.transform_input = transform_input self.conv1 = conv_block(3, 64, kernel_size=7, stride=2, padding=3) self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True) self.conv2 = conv_block(64, 64, kernel_size=1) self.conv3 = conv_block(64, 192, kernel_size=3, padding=1) self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True) self.inception3a = inception_block(192, 64, 96, 128, 16, 32, 32) self.inception3b = inception_block(256, 128, 128, 192, 32, 96, 64) self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True) self.inception4a = inception_block(480, 192, 96, 208, 16, 48, 64) self.inception4b = inception_block(512, 160, 112, 224, 24, 64, 64) self.inception4c = inception_block(512, 128, 128, 256, 24, 64, 64) self.inception4d = inception_block(512, 112, 144, 288, 32, 64, 64) self.inception4e = inception_block(528, 256, 160, 320, 32, 128, 128) self.maxpool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.inception5a = inception_block(832, 256, 160, 320, 32, 128, 128) self.inception5b = inception_block(832, 384, 192, 384, 48, 128, 128) if aux_logits: self.aux1 = inception_aux_block(512, num_classes) self.aux2 = inception_aux_block(528, num_classes) else: self.aux1 = None # type: ignore[assignment] self.aux2 = None # type: ignore[assignment] self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = nn.Dropout(0.2) self.fc = nn.Linear(1024, num_classes) if init_weights: self._initialize_weights() def _initialize_weights(self) -> None: for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): import scipy.stats as stats X = stats.truncnorm(-2, 2, scale=0.01) values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype) values = values.view(m.weight.size()) with torch.no_grad(): m.weight.copy_(values) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def _transform_input(self, x: Tensor) -> Tensor: if self.transform_input: x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 x = torch.cat((x_ch0, x_ch1, x_ch2), 1) return x def _forward(self, x: Tensor) -> Tuple[Tensor, Optional[Tensor], Optional[Tensor]]: # N x 3 x 224 x 224 x = self.conv1(x) # N x 64 x 112 x 112 x = self.maxpool1(x) # N x 64 x 56 x 56 x = self.conv2(x) # N x 64 x 56 x 56 x = self.conv3(x) # N x 192 x 56 x 56 x = self.maxpool2(x) # N x 192 x 28 x 28 x = self.inception3a(x) # N x 256 x 28 x 28 x = self.inception3b(x) # N x 480 x 28 x 28 x = self.maxpool3(x) # N x 480 x 14 x 14 x = self.inception4a(x) # N x 512 x 14 x 14 aux1 = torch.jit.annotate(Optional[Tensor], None) if self.aux1 is not None: if self.training: aux1 = self.aux1(x) x = self.inception4b(x) # N x 512 x 14 x 14 x = self.inception4c(x) # N x 512 x 14 x 14 x = self.inception4d(x) # N x 528 x 14 x 14 aux2 = torch.jit.annotate(Optional[Tensor], None) if self.aux2 is not None: if self.training: aux2 = self.aux2(x) x = self.inception4e(x) # N x 832 x 14 x 14 x = self.maxpool4(x) # N x 832 x 7 x 7 x = self.inception5a(x) # N x 832 x 7 x 7 x = self.inception5b(x) # N x 1024 x 7 x 7 x = self.avgpool(x) # N x 1024 x 1 x 1 x = torch.flatten(x, 1) # N x 1024 x = self.dropout(x) x = self.fc(x) # N x 1000 (num_classes) return x, aux2, aux1 @torch.jit.unused def eager_outputs(self, x: Tensor, aux2: Tensor, aux1: Optional[Tensor]) -> GoogLeNetOutputs: if self.training and self.aux_logits: return _GoogLeNetOutputs(x, aux2, aux1) else: return x # type: ignore[return-value] def forward(self, x: Tensor) -> GoogLeNetOutputs: x = self._transform_input(x) x, aux1, aux2 = self._forward(x) aux_defined = self.training and self.aux_logits if torch.jit.is_scripting(): if not aux_defined: warnings.warn("Scripted GoogleNet always returns GoogleNetOutputs Tuple") return GoogLeNetOutputs(x, aux2, aux1) else: return self.eager_outputs(x, aux2, aux1) class Inception(nn.Module): def __init__( self, in_channels: int, ch1x1: int, ch3x3red: int, ch3x3: int, ch5x5red: int, ch5x5: int, pool_proj: int, conv_block: Optional[Callable[..., nn.Module]] = None ) -> None: super(Inception, self).__init__() if conv_block is None: conv_block = BasicConv2d self.branch1 = conv_block(in_channels, ch1x1, kernel_size=1) self.branch2 = nn.Sequential( conv_block(in_channels, ch3x3red, kernel_size=1), conv_block(ch3x3red, ch3x3, kernel_size=3, padding=1) ) self.branch3 = nn.Sequential( conv_block(in_channels, ch5x5red, kernel_size=1), # Here, kernel_size=3 instead of kernel_size=5 is a known bug. # Please see https://github.com/pytorch/vision/issues/906 for details. conv_block(ch5x5red, ch5x5, kernel_size=3, padding=1) ) self.branch4 = nn.Sequential( nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True), conv_block(in_channels, pool_proj, kernel_size=1) ) def _forward(self, x: Tensor) -> List[Tensor]: branch1 = self.branch1(x) branch2 = self.branch2(x) branch3 = self.branch3(x) branch4 = self.branch4(x) outputs = [branch1, branch2, branch3, branch4] return outputs def forward(self, x: Tensor) -> Tensor: outputs = self._forward(x) return torch.cat(outputs, 1) class InceptionAux(nn.Module): def __init__( self, in_channels: int, num_classes: int, conv_block: Optional[Callable[..., nn.Module]] = None ) -> None: super(InceptionAux, self).__init__() if conv_block is None: conv_block = BasicConv2d self.conv = conv_block(in_channels, 128, kernel_size=1) self.fc1 = nn.Linear(2048, 1024) self.fc2 = nn.Linear(1024, num_classes) def forward(self, x: Tensor) -> Tensor: # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14 x = F.adaptive_avg_pool2d(x, (4, 4)) # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4 x = self.conv(x) # N x 128 x 4 x 4 x = torch.flatten(x, 1) # N x 2048 x = F.relu(self.fc1(x), inplace=True) # N x 1024 x = F.dropout(x, 0.7, training=self.training) # N x 1024 x = self.fc2(x) # N x 1000 (num_classes) return x class BasicConv2d(nn.Module): def __init__( self, in_channels: int, out_channels: int, **kwargs: Any ) -> None: super(BasicConv2d, self).__init__() self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs) self.bn = nn.BatchNorm2d(out_channels, eps=0.001) def forward(self, x: Tensor) -> Tensor: x = self.conv(x) x = self.bn(x) return F.relu(x, inplace=True) model = googlenet(True) p1 = next(model.parameters()) # change the fc layers to fit cifar10 dataset. model.fc = nn.Linear(1024, 10)
注意到 原始代码中的 load_state_dict_from_url 是通过utils.py简洁import进来的,我们这里直接将其import当前代码中。另外对于InceptionAux模块以及对应的aux head,在进行finetuing的时候需要进行重新训练(pytorch提供的预训练权重未包含auxhead的权重,我们这里的实现里面,并没有finetuning aux head,aux head 和分类head的finetuning原理是一样的)。
我们使用googlenet函数加载预训练模型之后,直接替换model的fc层 (见行326)。这样模型
中其他层都是预训练的,可以用来做特征抽取使用(暂且称为特征抽取层)。我们的finetuning这里主要就是要修改更新fc这一层的权重(这里称为分类层或者分类head)。
值得注意的是,我们可以在使用自己的数据集重新训练的过程中,不更新特征抽取层权重。这种情况下,训练只会调整分类头。这对于数据集与pretrain数据集类似的情况(或者数据集很小不容易训练)比较合适。相反,我们可以选择训练过程中更新特征抽取层权重(极端情况就是完全重新训练),这对于数据集较大且和pretrain数据集区别较大的情况比较合适。
我们这里实现的是 更新特征抽取层的 finetuning(pytorch中, 要禁止某层的权重参数更新只需要将对应属性置为false即可: param.requires_grad = False)
有了加载了pretrain权重并且修改了分类head的模型,我们这里准备下训练数据。这里我们选择cifar10 数据集:http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
这个数据集里面的数据解压完了是一些二进制数据,所以需要进行处理一下才能使用,如下代码:
如代码所示:
import pickle import numpy as np from torchvision import transforms from PIL import Image def unpickle(file): """load the cifar-10 data""" with open(file, 'rb') as fo: data = pickle.load(fo, encoding='bytes') return data def load_cifar_10_data(data_dir, negatives=False): """ Return train_data, train_filenames, train_labels, test_data, test_filenames, test_labels """ # get the meta_data_dict # num_cases_per_batch: 1000 # label_names: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] # num_vis: :3072 meta_data_dict = unpickle(data_dir + "/batches.meta") cifar_label_names = meta_data_dict[b'label_names'] cifar_label_names = np.array(cifar_label_names) # training data cifar_train_data = None cifar_train_filenames = [] cifar_train_labels = [] # cifar_train_data_dict # 'batch_label': 'training batch 5 of 5' # 'data': ndarray # 'filenames': list # 'labels': list for i in range(1, 6): cifar_train_data_dict = unpickle(data_dir + "/data_batch_{}".format(i)) if i == 1: cifar_train_data = cifar_train_data_dict[b'data'] else: cifar_train_data = np.vstack((cifar_train_data, cifar_train_data_dict[b'data'])) cifar_train_filenames += cifar_train_data_dict[b'filenames'] cifar_train_labels += cifar_train_data_dict[b'labels'] cifar_train_data = cifar_train_data.reshape((len(cifar_train_data), 3, 32, 32)) if negatives: cifar_train_data = cifar_train_data.transpose(0, 2, 3, 1).astype(np.float32) else: cifar_train_data = np.rollaxis(cifar_train_data, 1, 4) cifar_train_filenames = np.array(cifar_train_filenames) cifar_train_labels = np.array(cifar_train_labels) # test data # cifar_test_data_dict # 'batch_label': 'testing batch 1 of 1' # 'data': ndarray # 'filenames': list # 'labels': list cifar_test_data_dict = unpickle(data_dir + "/test_batch") cifar_test_data = cifar_test_data_dict[b'data'] cifar_test_filenames = cifar_test_data_dict[b'filenames'] cifar_test_labels = cifar_test_data_dict[b'labels'] cifar_test_data = cifar_test_data.reshape((len(cifar_test_data), 3, 32, 32)) if negatives: cifar_test_data = cifar_test_data.transpose(0, 2, 3, 1).astype(np.float32) else: cifar_test_data = np.rollaxis(cifar_test_data, 1, 4) cifar_test_filenames = np.array(cifar_test_filenames) cifar_test_labels = np.array(cifar_test_labels) return cifar_train_data, cifar_train_filenames, cifar_train_labels, \ cifar_test_data, cifar_test_filenames, cifar_test_labels, cifar_label_names train_data, train_filenames, train_labels, test_data, test_filenames, test_labels, label_names\ = load_cifar_10_data(fpath) def make_data_loader(data, labels): ret = {} train_factor = 0.8 print(data.shape) print(type(data)) train_num = int(data.shape[0]*train_factor) def makeTensor(da): preprocess = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) input_tensor = preprocess(Image.fromarray(da)) input_batch = input_tensor.unsqueeze(0) return input_batch def make_one_hot(id): #arr = np.array([0 for i in range(10)]) #arr[id] = 1 return torch.from_numpy(np.array([id])) #pre_ret = torch.from_numpy(arr) #eturn pre_ret.unsqueeze(0) data = data[:100,:] labels = labels[:100] t_data = data[0:train_num, :] t_data = [t_data[idx,:] for idx in range(len(t_data))] t_label = labels[0:train_num] t_label = [t_label[idx] for idx in range(len(t_label))] ret_train = [(makeTensor(t_data[idx]),make_one_hot(t_label[idx])) for idx in range(len(t_data)) ] v_data = data[:train_num:,] v_label = labels[:train_num:,] v_data = [v_data[idx,:] for idx in range(len(v_data))] v_label = [v_label[idx] for idx in range(len(v_label))] ret_val = [ (makeTensor(v_data[idx]), make_one_hot(v_label[idx])) for idx in range(len(v_data))] ret["train"] = ret_train ret["val"] = ret_val return ret loader = make_data_loader(train_data, train_labels)
load_cifar_10_data 函数首先对解压后的目录进行处理,区分出训练数据,测试数据,以及对应的标签数据。
make_data_loader 将训练数据,训练数据和标签改为训练使用的格式。并返回一个loader字典。
下面是对加载了pretrain权重的googlenet 进行filetune训练的核心逻辑
import time import copy device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False): since = time.time() val_acc_history = [] best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 print("begin to train") for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 # Iterate over data. for inputs, labels in dataloaders[phase]: #print(inputs.shape) #print(labels.shape) inputs = inputs.to(device) labels = labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): # Get model outputs and calculate loss # Special case for inception because in training it has an auxiliary output. In train # mode we calculate the loss by summing the final output and the auxiliary output # but in testing we only consider the final output. if is_inception and phase == 'train': # we don't train aux_outputs here. pass # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958 outputs, aux_outputs = model(inputs) loss1 = criterion(outputs, labels) loss2 = criterion(aux_outputs, labels) loss = loss1 + 0.4*loss2 else: outputs = model(inputs) #output_id = torch.max(outputs,1)[1].float() #label_id = torch.max(labels,1)[1].long() #print(outputs) #print(labels) loss = criterion(outputs, labels) _, preds = torch.max(outputs, 1) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / len(dataloaders[phase]) epoch_acc = running_corrects.double() / len(dataloaders[phase]) print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) # deep copy the model if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) if phase == 'val': val_acc_history.append(epoch_acc) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) print('Best val Acc: {:4f}'.format(best_acc)) # load best model weights model.load_state_dict(best_model_wts) return model, val_acc_history criterion = nn.CrossEntropyLoss() params_to_update = model.parameters() print(type(params_to_update)) optimizer_ft = torch.optim.SGD(params_to_update, lr=0.001, momentum=0.9) train_model(model, loader, criterion, optimizer_ft)
train_model函数主要逻辑也比较简单:
数据分为train 和eval两块
train阶段模型可以更新权重(进行反向传播loss.backward和权重更新optimizer.step)
eval 阶段只进行前向传播和计算准确率
整体就是pytorch训练的一般套路:
1 数据拷入device ; inputs = inputs.to(device)
2 重置梯度; optimizer.zero_grad()
3 前向传播; outputs = model(inputs)
4 计算损失;loss = criterion(outputs, labels)
5 损失反向传播; loss.backward()
6 更新权重 optimizer.step()
损失函数我们使用交叉熵损失。优化器使用SGD,动量为0.9
我们这只使用了训练数据集中前100个数据进行训练。可以看到到了第18个epoch train loss下降到了1.45,val loss 却下降不明显,有可能是过拟合有可能是数据集太少。我们这里只是示意训练,后续详细调优。
<class 'generator'> begin to train Epoch 0/24 ---------- train Loss: 2.6431 Acc: 0.0900 val Loss: 2.3888 Acc: 0.1200 Epoch 1/24 ---------- train Loss: 2.6619 Acc: 0.1100 val Loss: 2.3855 Acc: 0.1300 Epoch 2/24 ---------- train Loss: 2.5731 Acc: 0.1100 val Loss: 2.4109 Acc: 0.0900 Epoch 3/24 ---------- train Loss: 2.5192 Acc: 0.1000 val Loss: 2.3500 Acc: 0.1200 Epoch 4/24 ---------- train Loss: 2.4641 Acc: 0.0900 val Loss: 2.3360 Acc: 0.1000 . . . Epoch 17/24 ---------- train Loss: 1.4424 Acc: 0.4700 val Loss: 2.3126 Acc: 0.2100 Epoch 18/24 ---------- train Loss: 1.4533 Acc: 0.5100 val Loss: 2.3580 Acc: 0.2300
原创声明,本文系作者授权云+社区发表,未经许可,不得转载。
如有侵权,请联系 yunjia_community@tencent.com 删除。
我来说两句