# 教程 | 如何从TensorFlow转入PyTorch

PyTorch 的易用性如何？Andrej Karpathy 是这样评价的

import torch
import numpy as np

numpy_tensor = np.random.randn(10, 20)

# convert numpy array to pytorch array
pytorch_tensor = torch.Tensor(numpy_tensor)
# or another way
pytorch_tensor = torch.from_numpy(numpy_tensor)

# convert torch tensor to numpy representation
pytorch_tensor.numpy()

# if we want to use tensor on GPU provide another type
dtype = torch.cuda.FloatTensor
gpu_tensor = torch.randn(10, 20).type(dtype)
# or just call `cuda()` method
gpu_tensor = pytorch_tensor.cuda()
# call back to the CPU
cpu_tensor = gpu_tensor.cpu()

# define pytorch tensors
x = torch.randn(10, 20)
y = torch.ones(20, 5)
# `@` mean matrix multiplication from python3.5, PEP-0465
res = x @ y

# get the shape
res.shape  # torch.Size([10, 5])

import torch

# define an inputs
x_tensor = torch.randn(10, 20)
y_tensor = torch.randn(10, 5)
# define some weights

# get variable tensor
print(type(w.data))  # torch.FloatTensor

loss = torch.mean((y - x @ w) ** 2)

loss.backward()
# manually zero gradients after update

import torch
import torch.nn.functional as F

# define some weights

learning_rate = 0.1
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD([w1, w2], lr=learning_rate)
for step in range(5):
pred = F.sigmoid(x @ w1)
pred = F.sigmoid(pred @ w2)
loss = loss_fn(pred, y)

# manually zero all previous gradients
loss.backward()
optimizer.step()

PyTorch 和 TensorFlow 的另一个主要区别在于其不同的计算图表现形式。TensorFlow 使用静态图，这意味着我们是先定义，然后不断使用它。在 PyTorch 中，每次正向传播都会定义一个新计算图。在开始阶段，两者之间或许差别不是很大，但动态图会在你希望调试代码，或定义一些条件语句时显现出自己的优势。就像你可以使用自己最喜欢的 debugger 一样！

import tensorflow as tf

first_counter = tf.constant(0)
second_counter = tf.constant(10)
some_value = tf.Variable(15)

# condition should handle all args:
def cond(first_counter, second_counter, *args):
return first_counter < second_counter

def body(first_counter, second_counter, some_value):
return first_counter, second_counter, some_value

c1, c2, val = tf.while_loop(
cond, body, [first_counter, second_counter, some_value])

with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
counter_1_res, counter_2_res = sess.run([c1, c2])
import torch

first_counter = torch.Tensor([0])
second_counter = torch.Tensor([10])
some_value = torch.Tensor(15)

while (first_counter < second_counter)[0]:
first_counter += 2
second_counter += 1

from collections import OrderedDict

import torch.nn as nn

# Example of using Sequential
model = nn.Sequential(
nn.Conv2d(1, 20, 5),
nn.ReLU(),
nn.Conv2d(20, 64, 5),
nn.ReLU()
)

# Example of using Sequential with OrderedDict
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1, 20, 5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20, 64, 5)),
('relu2', nn.ReLU())
]))

output = model(some_input)

from torch import nn

class Model(nn.Module):
def __init__(self):
super().__init__()
self.feature_extractor = nn.Sequential(
)
self.second_extractor = nn.Conv2d(

def forward(self, x):
x = self.feature_extractor(x)
x = self.second_extractor(x)
# note that we may call same layer twice or mode
x = self.second_extractor(x)
return x

import torch

@staticmethod
def forward(ctx, input):
ctx.save_for_backward(input)
output = torch.sign(input)
return output

@staticmethod
# saved tensors - tuple of tensors, so we need get first
input, = ctx.saved_variables

# usage
x = torch.randn(10, 20)
y = MyFunction.apply(x)
# or
my_func = MyFunction.apply
y = my_func(x)

# and if we want to use inside nn.Module
class MyFunctionModule(torch.nn.Module):
def forward(self, x):
return MyFunction.apply(x)

import torch

### tensor example
x_cpu = torch.randn(10, 20)
w_cpu = torch.randn(20, 10)
# direct transfer to the GPU
x_gpu = x_cpu.cuda()
w_gpu = w_cpu.cuda()
result_gpu = x_gpu @ w_gpu
# get back from GPU to CPU
result_cpu = result_gpu.cpu()

### model example
model = model.cuda()
# train step
inputs = Variable(inputs.cuda())
outputs = model(inputs)
# get back from GPU to CPU
outputs = outputs.cpu()

class Trainer:
def __init__(self, model, use_cuda=False, gpu_idx=0):
self.use_cuda = use_cuda
self.gpu_idx = gpu_idx
self.model = self.to_gpu(model)

def to_gpu(self, tensor):
if self.use_cuda:
return tensor.cuda(self.gpu_idx)
else:
return tensor

def from_gpu(self, tensor):
if self.use_cuda:
return tensor.cpu()
else:
return tensor

def train(self, inputs):
inputs = self.to_gpu(inputs)
outputs = self.model(inputs)
outputs = self.from_gpu(outputs)

import torch

# new way with `init` module
w = torch.Tensor(3, 5)
torch.nn.init.normal(w)
# work for Variables also
w2 = Variable(w)
torch.nn.init.normal(w2)
w2.data.normal_()

# example for some module
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)

# for loop approach with direct access
class MyModel(nn.Module):
def __init__(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.bias.data.zero_()

import torch

# If there’s a single input to an operation that requires gradient,
# its output will also require gradient.
x = Variable(torch.randn(5, 5))
y = Variable(torch.randn(5, 5))
a = x + y
b = a + z

# Volatile differs from requires_grad in how the flag propagates.
# If there’s even a single volatile input to an operation,
# its output is also going to be volatile.
y = Variable(torch.randn(5, 5), volatile=True)
a = x + y

# scheduler example
from torch.optim import lr_scheduler

optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

for epoch in range(100):
scheduler.step()
train()
validate()

# Train flag can be updated with boolean
# to disable dropout and batch norm learning
model.train(True)
# execute train step
model.train(False)
# run inference step

# CPU seed
torch.manual_seed(42)
# GPU seed
torch.cuda.manual_seed_all(42)

from collections import OrderedDict

import torch.nn as nn

model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1, 20, 5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20, 64, 5)),
('relu2', nn.ReLU())
]))

print(model)

# Sequential (
#   (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
#   (relu1): ReLU ()
#   (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
#   (relu2): ReLU ()
# )

# save/load only the model parameters(prefered solution)
torch.save(model.state_dict(), save_path)

# save whole model
torch.save(model, save_path)

• logger：https://github.com/oval-group/logger
• Crayon：https://github.com/torrvision/crayon
• tensorboard_logger：https://github.com/TeamHG-Memex/tensorboard_logger
• tensorboard-pytorch：https://github.com/lanpa/tensorboard-pytorch

PyTorch 开发者不希望重新发明轮子，他们只是想要借鉴多重处理。为了构建自己的数据加载器，你可以从 torch.utils.data.Dataset 继承类，并更改一些方法：

import torch
import torchvision as tv

class ImagesDataset(torch.utils.data.Dataset):
def __init__(self, df, transform=None,
self.df = df
self.transform = transform

def __getitem__(self, index):
row = self.df.iloc[index]

target = row['class_']
path = row['path']
if self.transform is not None:
img = self.transform(img)

return img, target

def __len__(self):
n, _ = self.df.shape
return n

# what transformations should be done with our images
data_transforms = tv.transforms.Compose([
tv.transforms.RandomHorizontalFlip(),
tv.transforms.ToTensor(),
])

# initialize our dataset at first
train_dataset = ImagesDataset(
df=train_df,
transform=data_transforms
)

# initialize data loader with required number of workers and other params
batch_size=10,
shuffle=True,
num_workers=16)

# fetch the batch(call to `__getitem__` method)
pass

1. PyTorch 的图维度和 TensorFlow 的不同。前者的是 [Batch_size × channels × height × width] 的形式。但如果你没有通过预处理步骤 torchvision.transforms.ToTensor() 进行交互，则可以进行转换。在 transforms 包中还有很多有用小工具。

2. 你很可能会使用固定内存的 GPU，对此，你只需要对 cuda() 调用额外的标志 async = True，并从标记为 pin_memory = True 的 DataLoader 中获取固定批次。

class ImagesDataset(torch.utils.data.Dataset):
pass

class Net(nn.Module):
pass

model = Net()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
criterion = torch.nn.MSELoss()

dataset = ImagesDataset(path_to_images)

train = True
for epoch in range(epochs):
if train:
lr_scheduler.step()

inputs = Variable(to_gpu(inputs))
labels = Variable(to_gpu(labels))

outputs = model(inputs)
loss = criterion(outputs, labels)
if train:
loss.backward()
optimizer.step()

if not train:
save_best_model(epoch_validation_accuracy)

• 它可以用来代替 Numpy
• 它的原型设计非常快
• 调试和使用条件流非常简单
• 有很多方便且开箱即用的工具

PyTorch 是一个正在快速发展的框架，背靠一个富有活力的社区。现在是尝试 PyTorch 的好时机。

0 条评论

• ### 快速上手笔记，PyTorch模型训练实用教程（附代码）

自 2017 年 1 月 PyTorch 推出以来，其热度持续上升，一度有赶超 TensorFlow 的趋势。PyTorch 能在短时间内被众多研究人员和工程师...

• ### 只知道TF和PyTorch还不够，快来看看怎么从PyTorch转向自动微分神器JAX

Jax 是谷歌开发的一个 Python 库，用于机器学习和数学计算。一经推出，Jax 便将其定义为一个 Python+NumPy 的程序包。它有着可以进行微分、...

• ### 四天速成！中国香港科技大学 PyTorch 课件分享

机器之心整理 参与：黄小天、蒋思源 前天，香港科技大学计算机系教授 Sung Kim 在 Google Drive 分享了一个 3 天速成的 TensorFlo...

• ### 干货 | PyTorch相比TensorFlow，存在哪些自身优势？

1、 PyTorch 课替代NumPy 使用：PyTorch 本身主要构件是张量——和 NumPy 看起来差不多。使得 PyTorch 可支持大量相同的 API...

• ### PyTorch : torch.nn.xxx 和 torch.nn.functional.xxx

在写 PyTorch 代码时，我们会发现在 torch.nn.xxx 和 torch.nn.functional.xxx 中有一些功能重复的操作，比如卷积、激活...

• ### PyTorch5:torch.nn总览&torch.nn.Module

PyTorch 把与深度学习模型搭建相关的全部类全部在 torch.nn 这个子模块中。

• ### pytorch进行CIFAR-10分类（2）定义卷积神经网络

官网tutorial中显示图片的那部分我就直接省略了，因为跟训练网络无关，只是for fun

• ### PyTorch中模型的可复现性

在PyTorch发行版中，不同的版本或不同的平台上，不能保证完全可重复的结果。此外，即使在使用相同种子的情况下，结果也不能保证在CPU和GPU上再现。

• ### 指令集架构(ISA)之IBM Power ISA开源应对​RISC-V生态(13k字)

科学Sciences导读：指令集架构(Instruction-SetArchitecture, ISA)之IBM Power ISA开源应对RISC-V生态。本...