# 手把手教你由TensorFlow上手PyTorch（附代码）

PyTorch 的易用性如何？Andrej Karpathy 是这样评价的

```import torch
import numpy as np
numpy_tensor = np.random.randn(10, 20)
# convert numpy array to pytorch array
pytorch_tensor = torch.Tensor(numpy_tensor)
# or another way
pytorch_tensor = torch.from_numpy(numpy_tensor)
# convert torch tensor to numpy representation
pytorch_tensor.numpy()
# if we want to use tensor on GPU provide another typedtype = torch.cuda.FloatTensorgpu_tensor = torch.randn(10, 20).type(dtype)# or just call `cuda()` methodgpu_tensor = pytorch_tensor.cuda()# call back to the CPUcpu_tensor = gpu_tensor.cpu()# define pytorch tensorsx = torch.randn(10, 20)y = torch.ones(20, 5)# `@` mean matrix multiplication from python3.5, PEP-0465res = x @ y# get the shaperes.shape  # torch.Size([10, 5])```

`import torchfrom torch.autograd import Variable# define an inputsx_tensor = torch.randn(10, 20)y_tensor = torch.randn(10, 5)x = Variable(x_tensor, requires_grad=False)y = Variable(y_tensor, requires_grad=False)# define some weightsw = Variable(torch.randn(20, 5), requires_grad=True)# get variable tensorprint(type(w.data))  # torch.FloatTensor# get variable gradientprint(w.grad)  # Noneloss = torch.mean((y - x @ w) ** 2)# calculate the gradientsloss.backward()print(w.grad)  # some gradients# manually apply gradientsw.data -= 0.01 * w.grad.data# manually zero gradients after updatew.grad.data.zero_()`

`import torchfrom torch.autograd import Variableimport torch.nn.functional as Fx = Variable(torch.randn(10, 20), requires_grad=False)y = Variable(torch.randn(10, 3), requires_grad=False)# define some weightsw1 = Variable(torch.randn(20, 5), requires_grad=True)w2 = Variable(torch.randn(5, 3), requires_grad=True)learning_rate = 0.1loss_fn = torch.nn.MSELoss()optimizer = torch.optim.SGD([w1, w2], lr=learning_rate)for step in range(5):    pred = F.sigmoid(x @ w1)    pred = F.sigmoid(pred @ w2)    loss = loss_fn(pred, y)    # manually zero all previous gradients    optimizer.zero_grad()    # calculate new gradients    loss.backward()    # apply new gradients    optimizer.step()`

PyTorch 和 TensorFlow 的另一个主要区别在于其不同的计算图表现形式。TensorFlow 使用静态图，这意味着我们是先定义，然后不断使用它。在 PyTorch 中，每次正向传播都会定义一个新计算图。在开始阶段，两者之间或许差别不是很大，但动态图会在你希望调试代码，或定义一些条件语句时显现出自己的优势。就像你可以使用自己最喜欢的 debugger 一样！

`import tensorflow as tffirst_counter = tf.constant(0)second_counter = tf.constant(10)some_value = tf.Variable(15)# condition should handle all args:def cond(first_counter, second_counter, *args):    return first_counter < second_counterdef body(first_counter, second_counter, some_value):    first_counter = tf.add(first_counter, 2)    second_counter = tf.add(second_counter, 1)    return first_counter, second_counter, some_valuec1, c2, val = tf.while_loop(    cond, body, [first_counter, second_counter, some_value])with tf.Session() as sess:    sess.run(tf.global_variables_initializer())    counter_1_res, counter_2_res = sess.run([c1, c2])`
`import torchfirst_counter = torch.Tensor([0])second_counter = torch.Tensor([10])some_value = torch.Tensor(15)while (first_counter < second_counter)[0]:    first_counter += 2    second_counter += 1`

`from collections import OrderedDictimport torch.nn as nn# Example of using Sequentialmodel = nn.Sequential(    nn.Conv2d(1, 20, 5),    nn.ReLU(),    nn.Conv2d(20, 64, 5),    nn.ReLU())# Example of using Sequential with OrderedDictmodel = nn.Sequential(OrderedDict([    ('conv1', nn.Conv2d(1, 20, 5)),    ('relu1', nn.ReLU()),    ('conv2', nn.Conv2d(20, 64, 5)),    ('relu2', nn.ReLU())]))output = model(some_input)`

`from torch import nnclass Model(nn.Module):    def __init__(self):        super().__init__()        self.feature_extractor = nn.Sequential(            nn.Conv2d(3, 12, kernel_size=3, padding=1, stride=1),            nn.Conv2d(12, 24, kernel_size=3, padding=1, stride=1),        )        self.second_extractor = nn.Conv2d(            24, 36, kernel_size=3, padding=1, stride=1)    def forward(self, x):        x = self.feature_extractor(x)        x = self.second_extractor(x)        # note that we may call same layer twice or mode        x = self.second_extractor(x)        return x`

`import torchclass MyFunction(torch.autograd.Function):    @staticmethod    def forward(ctx, input):        ctx.save_for_backward(input)        output = torch.sign(input)        return output    @staticmethod    def backward(ctx, grad_output):        # saved tensors - tuple of tensors, so we need get first        input, = ctx.saved_variables        grad_output[input.ge(1)] = 0        grad_output[input.le(-1)] = 0        return grad_output# usagex = torch.randn(10, 20)y = MyFunction.apply(x)# ormy_func = MyFunction.applyy = my_func(x)# and if we want to use inside nn.Moduleclass MyFunctionModule(torch.nn.Module):    def forward(self, x):        return MyFunction.apply(x)`

`import torch### tensor examplex_cpu = torch.randn(10, 20)w_cpu = torch.randn(20, 10)# direct transfer to the GPUx_gpu = x_cpu.cuda()w_gpu = w_cpu.cuda()result_gpu = x_gpu @ w_gpu# get back from GPU to CPUresult_cpu = result_gpu.cpu()### model examplemodel = model.cuda()# train stepinputs = Variable(inputs.cuda())outputs = model(inputs)# get back from GPU to CPUoutputs = outputs.cpu()`

`class Trainer:    def __init__(self, model, use_cuda=False, gpu_idx=0):        self.use_cuda = use_cuda        self.gpu_idx = gpu_idx        self.model = self.to_gpu(model)    def to_gpu(self, tensor):        if self.use_cuda:            return tensor.cuda(self.gpu_idx)        else:            return tensor    def from_gpu(self, tensor):        if self.use_cuda:            return tensor.cpu()        else:            return tensor    def train(self, inputs):        inputs = self.to_gpu(inputs)        outputs = self.model(inputs)        outputs = self.from_gpu(outputs)`

`import torchfrom torch.autograd import Variable# new way with `init` modulew = torch.Tensor(3, 5)torch.nn.init.normal(w)# work for Variables alsow2 = Variable(w)torch.nn.init.normal(w2)# old styled direct access to tensors data attributew2.data.normal_()# example for some moduledef weights_init(m):    classname = m.__class__.__name__    if classname.find('Conv') != -1:        m.weight.data.normal_(0.0, 0.02)    elif classname.find('BatchNorm') != -1:        m.weight.data.normal_(1.0, 0.02)        m.bias.data.fill_(0)# for loop approach with direct accessclass MyModel(nn.Module):    def __init__(self):        for m in self.modules():            if isinstance(m, nn.Conv2d):                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels                m.weight.data.normal_(0, math.sqrt(2. / n))            elif isinstance(m, nn.BatchNorm2d):                m.weight.data.fill_(1)                m.bias.data.zero_()            elif isinstance(m, nn.Linear):                m.bias.data.zero_()`

`import torchfrom torch.autograd import Variable# requires grad# If there’s a single input to an operation that requires gradient,# its output will also require gradient.x = Variable(torch.randn(5, 5))y = Variable(torch.randn(5, 5))z = Variable(torch.randn(5, 5), requires_grad=True)a = x + ya.requires_grad  # Falseb = a + zb.requires_grad  # True# Volatile differs from requires_grad in how the flag propagates.# If there’s even a single volatile input to an operation,# its output is also going to be volatile.x = Variable(torch.randn(5, 5), requires_grad=True)y = Variable(torch.randn(5, 5), volatile=True)a = x + ya.requires_grad  # False`

`# scheduler examplefrom torch.optim import lr_scheduleroptimizer = torch.optim.SGD(model.parameters(), lr=0.01)scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)for epoch in range(100):    scheduler.step()    train()    validate()# Train flag can be updated with boolean# to disable dropout and batch norm learningmodel.train(True)# execute train stepmodel.train(False)# run inference step# CPU seedtorch.manual_seed(42)# GPU seedtorch.cuda.manual_seed_all(42)`

`from collections import OrderedDictimport torch.nn as nnmodel = nn.Sequential(OrderedDict([    ('conv1', nn.Conv2d(1, 20, 5)),    ('relu1', nn.ReLU()),    ('conv2', nn.Conv2d(20, 64, 5)),    ('relu2', nn.ReLU())]))print(model)# Sequential (#   (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))#   (relu1): ReLU ()#   (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))#   (relu2): ReLU ()# )# save/load only the model parameters(prefered solution)torch.save(model.state_dict(), save_path)model.load_state_dict(torch.load(save_path))# save whole modeltorch.save(model, save_path)model = torch.load(save_path)`

• logger：https://github.com/oval-group/logger
• Crayon：https://github.com/torrvision/crayon
• tensorboard_logger：https://github.com/TeamHG-Memex/tensorboard_logger
• tensorboard-pytorch：https://github.com/lanpa/tensorboard-pytorch
• Visdom：https://github.com/facebookresearch/visdom

PyTorch 开发者不希望重新发明轮子，他们只是想要借鉴多重处理。为了构建自己的数据加载器，你可以从 torch.utils.data.Dataset 继承类，并更改一些方法：

`import torchimport torchvision as tvclass ImagesDataset(torch.utils.data.Dataset):    def __init__(self, df, transform=None,                 loader=tv.datasets.folder.default_loader):        self.df = df        self.transform = transform        self.loader = loader    def __getitem__(self, index):        row = self.df.iloc[index]        target = row['class_']        path = row['path']        img = self.loader(path)        if self.transform is not None:            img = self.transform(img)        return img, target    def __len__(self):        n, _ = self.df.shape        return n# what transformations should be done with our imagesdata_transforms = tv.transforms.Compose([    tv.transforms.RandomCrop((64, 64), padding=4),    tv.transforms.RandomHorizontalFlip(),    tv.transforms.ToTensor(),])train_df = pd.read_csv('path/to/some.csv')# initialize our dataset at firsttrain_dataset = ImagesDataset(    df=train_df,    transform=data_transforms)# initialize data loader with required number of workers and other paramstrain_loader = torch.utils.data.DataLoader(train_dataset,                                           batch_size=10,                                           shuffle=True,                                           num_workers=16)# fetch the batch(call to `__getitem__` method)for img, target in train_loader:    pass`

• PyTorch 的图维度和 TensorFlow 的不同。前者的是 [Batch_size × channels × height × width] 的形式。但如果你没有通过预处理步骤 torchvision.transforms.ToTensor() 进行交互，则可以进行转换。在 transforms 包中还有很多有用小工具。
• 你很可能会使用固定内存的 GPU。对此，你只需要对 cuda() 调用额外的标志 async = True，并从标记为 pin_memory = True 的 DataLoader 中获取固定批次。

`class ImagesDataset(torch.utils.data.Dataset):    passclass Net(nn.Module):    passmodel = Net()optimizer = torch.optim.SGD(model.parameters(), lr=0.01)scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)criterion = torch.nn.MSELoss()dataset = ImagesDataset(path_to_images)data_loader = torch.utils.data.DataLoader(dataset, batch_size=10)train = Truefor epoch in range(epochs):    if train:        lr_scheduler.step()    for inputs, labels in data_loader:        inputs = Variable(to_gpu(inputs))        labels = Variable(to_gpu(labels))        outputs = model(inputs)        loss = criterion(outputs, labels)        if train:            optimizer.zero_grad()            loss.backward()            optimizer.step()    if not train:        save_best_model(epoch_validation_accuracy)`

• 它可以用来代替 Numpy
• 它的原型设计非常快
• 调试和使用条件流非常简单
• 有很多方便且开箱即用的工具

PyTorch 是一个正在快速发展的框架，背靠一个富有活力的社区。现在是尝试 PyTorch 的好时机。

561 篇文章105 人订阅

0 条评论

## 相关文章

2199

2137

49010

### 绘图: matplotlib核心剖析

matplotlib是基于Python语言的开源项目，旨在为Python提供一个数据绘图包。我将在这篇文章中介绍matplotlib API的核心对象，并介绍如...

2327

1103

1463

4167

1173

### 目前深度学习最强框架——PyTorch

PyTorch由于使用了强大的GPU加速的Tensor计算（类似numpy）和基于磁带的自动系统的深度神经网络。这使得今年一月份被开源的PyTorch成为了深度...

5575

3981