我正在努力训练一个网络,在这个网络中,每一层的学习速度都是1/(层宽)。有办法在火把里这样做吗?我试着改变优化器中的学习速度,并将它包含在我的训练循环中,但这没有起作用。我见过一些人和Adam谈论过这个问题,但我正在使用SGD来训练。以下是我定义我的模型和培训的部分,如果这有帮助的话。
class ConvNet2(nn.Module):
def __init__(self):
super(ConvNet2, self).__init__()
self.network = nn.Sequential(
nn.Conv2d(3, 8, 3),
nn.ReLU(),
nn.Conv2d(8,32, 3),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 32, 3),
nn.ReLU(),
nn.Conv2d(32,32, 3),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Flatten(),
nn.Linear(800, 10)
)
def forward(self, x):
return self.network(x)
net2 = ConvNet2().to(device)
def train(network, number_of_epochs):
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(network.parameters(), lr=learning_rate)
for epoch in range(number_of_epochs): # loop over the dataset multiple times
running_loss = 0.0
for i, (inputs, labels) in enumerate(trainloader):
# get the inputs
inputs = inputs.to(device)
labels = labels.to(device)
outputs = network(inputs)
loss = criterion(outputs, labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = network(inputs)
loss.backward()
optimizer.step()
发布于 2022-05-01 18:59:51
在文档中,您可以看到可以指定“每个参数选项”。假设您只想为Conv2d
层指定学习速率(这在下面的代码中是很容易定制的),那么您可以这样做:
import torch
from torch import nn
from torch import optim
from pprint import pprint
class ConvNet2(nn.Module):
def __init__(self):
super(ConvNet2, self).__init__()
self.network = nn.Sequential(
nn.Conv2d(3, 8, 3),
nn.ReLU(),
nn.Conv2d(8,32, 3),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 32, 3),
nn.ReLU(),
nn.Conv2d(32,32, 3),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Flatten(),
nn.Linear(800, 10)
)
def forward(self, x):
return self.network(x)
net2 = ConvNet2()
def getParameters(model):
getWidthConv2D = lambda layer: layer.out_channels
parameters = []
for layer in model.children():
paramdict = {'params': layer.parameters()}
if (isinstance(layer, nn.Conv2d)):
paramdict['lr'] = getWidthConv2D(layer) * 0.1 # Specify learning rate for Conv2D here
parameters.append(paramdict)
return parameters
optimizer = optim.SGD(getParameters(net2.network), lr=0.05)
print(optimizer)
发布于 2022-05-01 18:36:22
你可以通过传递相关的参数和相关的学习率来做到这一点。
optimizer = optim.SGD(
[
{"params": network.layer[0].parameters(), "lr": 1e-1},
{"params": network.layer[1].parameters(), "lr": 1e-2},
...
],
lr=1e-3,
)
https://stackoverflow.com/questions/72079140
复制相似问题