# 前言

“This note will present an overview of how autograd works and records the operations. It’s not strictly necessary to understand all this, but we recommend getting familiar with it, as it will help you write more efficient, cleaner programs, and can aid you in debugging.”

# 正文

>>> x = torch.randn(5, 5)  # requires_grad=False by default
>>> y = torch.randn(5, 5)  # requires_grad=False by default
>>> z = torch.randn((5, 5), requires_grad=True)
>>> a = x + y
False
>>> b = a + z
True

## register hook

In[2]: import torch
In[4]: y = x * 2
In[5]: z = torch.mean(y)
In[6]: z
Out[6]: tensor(3.)
In[7]: z.backward()
Out[8]: tensor([ 1.,  1.])
In[10]: z.grad   # 应该为1

register hook (hook)[source] 这个函数属于torch.tensor类，这个函数在与这个tensor梯度计算的时候就会执行，这个函数的参数hook是一个函数，这个函数应该是以下的形式：

In[2]: import torch
In[4]: y = x * 2
Out[5]: True
In[6]: y.register_hook(print)
Out[6]: <torch.utils.hooks.RemovableHandle at 0x7f765e876f60>
In[7]: z = torch.mean(y)
In[8]: z.backward()
tensor([ 0.5000,  0.5000])

## register_backward_hook

,然后经过两个运算：

。我们设置w都为8。偏置b为2

（以下分析需要精力投入）

import torch
import torch.nn as nn

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class MyMul(nn.Module):
def forward(self, input):
out = input * 2
return out

class MyMean(nn.Module):            # 自定义除法module
def forward(self, input):
out = input/4
return out

print('tensor hook')

class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.f1 = nn.Linear(4, 1, bias=True)
self.f2 = MyMean()
self.weight_init()

def forward(self, input):
self.input = input
output = self.f1(input)       # 先进行运算1，后进行运算2
output = self.f2(output)
return output

def weight_init(self):
self.f1.weight.data.fill_(8.0)    # 这里设置Linear的权重为8
self.f1.bias.data.fill_(2.0)      # 这里设置Linear的bias为2

print('doing my_hook')

if __name__ == '__main__':

input = torch.tensor([1, 2, 3, 4], dtype=torch.float32, requires_grad=True).to(device)

net = MyNet()
net.to(device)

net.register_backward_hook(net.my_hook)   # 这两个hook函数一定要result = net(input)执行前执行，因为hook函数实在forward的时候进行绑定的
input.register_hook(tensor_hook)
result = net(input)

print('result =', result)

result.backward()

for param in net.parameters():
print('{}:grad->{}'.format(param, param.grad))

class MyNet(nn.Module):
def __init__(self):
...

def forward(self, input):
self.input = input
output = input * 2
output = output / 4
return output

（上面这段话收回）

result = tensor([ 20.5000], device='cuda:0')
doing my_hook
tensor hook
grad: tensor([ 2., 2., 2., 2.], device='cuda:0')
Parameter containing:
tensor([[ 8., 8., 8., 8.]], device='cuda:0'):grad->tensor([[ 0.2500, 0.5000, 0.7500, 1.0000]], device='cuda:0')
Parameter containing:
tensor([ 2.], device='cuda:0'):grad->tensor([ 0.2500], device='cuda:0')

，拿

 output = self.f1(input)
output = self.f2(output)

result = tensor([ 22.], device='cuda:0')
doing my_hook
original grad: (tensor([ 1.], device='cuda:0'), tensor([ 1.], device='cuda:0'))
tensor hook
grad: tensor([ 2.,  2.,  2.,  2.], device='cuda:0')
Parameter containing:
tensor([[ 8.,  8.,  8.,  8.]], device='cuda:0'):grad->tensor([[ 0.2500,  0.5000,  0.7500,  1.0000]], device='cuda:0')
Parameter containing:
tensor([ 2.], device='cuda:0'):grad->tensor([ 1.], device='cuda:0')

 output = self.f2(input)
output = self.f1(output)/4

result = tensor([ 5.5000], device='cuda:0')
doing my_hook
tensor hook
grad: tensor([ 0.5000,  0.5000,  0.5000,  0.5000], device='cuda:0')
Parameter containing:
tensor([[ 8.,  8.,  8.,  8.]], device='cuda:0'):grad->tensor([[ 0.0625,  0.1250,  0.1875,  0.2500]], device='cuda:0')
Parameter containing:
tensor([ 2.], device='cuda:0'):grad->tensor([ 0.2500], device='cuda:0')

 output = self.f1(input)
output = self.f2(output)

 output = self.f2(input)
output = self.f1(output)

。也就对应着之前说的 (tensor([ 1.], device=’cuda:0′), tensor([ 1.], device=’cuda:0′))。

 output = self.f2(input)
output = self.f1(output)/4

# 后记

# 这里定义一个自编码器的网络层
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()

self.encoder = nn.Sequential(
_ConvLayer(3, 128),
_ConvLayer(128, 256),
_ConvLayer(256, 512),
_ConvLayer(512, 1024),
Flatten(),
nn.Linear(1024 * 4 * 4,1024),
nn.Linear(1024,1024 * 4 * 4),
Reshape(),
_UpScale(1024, 512),
)
...

for param in model.encoder.parameters():
print(param.requires_grad)

True
True
True
True
True
True
True
True
True
True
True
True
True
True

