以学习一条直线y=ax+b为例演示PyTorch深度学习流程。末尾(第6节)有完整代码。笔记写的比较简单,详细的Notebook见24小时入门PyTorch深度学习。
torch中的重要模块:
#2. 创建模型
#通过继承 nn.Module 来创建自己的模型
class LinearRegressionModelV2(nn.Module):
def __init__(self):
super().__init__()
# Use nn.Linear() for creating the model parameters
self.linear_layer = nn.Linear(in_features=1,
out_features=1)
# 定义forward函数(也就是计算过程)
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.linear_layer(x)
torch.nn
.有许多内置损失函数例如:loss_fn = nn.L1Loss() # MAE loss is same as L1Loss
torch.optim
有很多优化器,如torch.optim.SGD()
、torch.optim.Adam()
optimizer = torch.optim.SGD(params=model_0.parameters(), # parameters of target model to optimize
lr=0.01) # learning rate
y_pred = model(x_train)
loss = loss_fn(y_pred, y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
y_pred = model(x_test)
loss = loss_fn(y_pred, y_test)
推理时不需要模型计算梯度,通过下面操作实现推理:
设置模型为评估模式:model.eval()
.
使用推理上下文管理器: with torch.inference_mode():
注意:PyTorch1.10 之后才有推理模式。之前的版本可以用
with torch.no_grad()
预测时,所有对象(数据和模型)应该在同一个设备上
推理示例:
model_0.eval()
with torch.inference_mode():
y_preds = model_0(X_test)
通过torch.save(状态字典,保存路径)
保存模型的状态字典(state_dict())。
state_dict
保存了模型的参数。
模型加载之前保存的状态字典:
model_0.load_state_dict(torch.load(f=MODEL_SAVE_PATH))
将上面5个步骤组合起来,就得到一个完整的学习流程。
# 通过线性模型学习直线 y = w*x + b的w和b参数
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
#1. 准备数据
weight,bias = 0.7, 0.3
# Create X and y (features and labels)
start, end, step = 0, 1, 0.02
X = torch.arange(start, end, step).unsqueeze(dim=1)
y = weight * X + bias
# Split data
train_split = int(0.8 * len(X))
X_train, y_train = X[:train_split], y[:train_split]
X_test, y_test = X[train_split:], y[train_split:]
len(X_train), len(y_train), len(X_test), len(y_test)
#2. 创建模型
#通过继承 nn.Module 来创建自己的模型
class LinearRegressionModelV2(nn.Module):
def __init__(self):
super().__init__()
# Use nn.Linear() for creating the model parameters
self.linear_layer = nn.Linear(in_features=1,
out_features=1)
# 定义forward函数(也就是计算过程)
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.linear_layer(x)
torch.manual_seed(42) # 设置随机种子
model_1 = LinearRegressionModelV2()
model_1.to(device)
# 可以print model_1, model_1.state_dict()
# 3. 训练
# loss 函数
loss_fn = nn.L1Loss()
# optimizer
optimizer = torch.optim.SGD(params=model_1.parameters(),
lr=0.01)
# 设置epochs (训练次数)
epochs = 1000
# to device
X_train = X_train.to(device)
X_test = X_test.to(device)
y_train = y_train.to(device)
y_test = y_test.to(device)
# 训练epochs 次
for epoch in range(epochs):
### Training
model_1.train()
y_pred = model_1(X_train) # 1. Forward pass
loss = loss_fn(y_pred, y_train) # 2. Calculate loss
optimizer.zero_grad() # 3. Zero grad optimizer
loss.backward() # 4. Loss backward
optimizer.step() # 5. Step the optimizer
### Testing
model_1.eval() # 将模型设置为eval(评估)模式,不需要计算梯度
with torch.inference_mode():
test_pred = model_1(X_test)
test_loss = loss_fn(test_pred, y_test)
if epoch % 100 == 0:
print(f"Epoch: {epoch} | Train loss: {loss} | Test loss: {test_loss}")
# 4. 推理
# 查看模型参数
from pprint import pprint # pprint = pretty print, see: https://docs.python.org/3/library/pprint.html
print("The model learned the following values for weights and bias:")
pprint(model_1.state_dict())
print("\nAnd the original values for weights and bias are:")
print(f"weights: {weight}, bias: {bias}")
#
model_1.eval()
# Make predictions on the test data
with torch.inference_mode():
y_preds = model_1(X_test)
print(f"y_preds={y_preds}")
# 5. 保存模型参数
from pathlib import Path
# 创建models目录
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)
# 创建保存路径
MODEL_NAME = "01_pytorch_workflow_model_1.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
# 保存参数字典
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_1.state_dict(),
f=MODEL_SAVE_PATH)
loaded_model_1 = LinearRegressionModelV2()
loaded_model_1.load_state_dict(torch.load(MODEL_SAVE_PATH))
loaded_model_1.to(device)
print(f"Loaded model:\n{loaded_model_1}")
print(f"Model on device:\n{next(loaded_model_1.parameters()).device}")
# 评估加载字典后的模型
loaded_model_1.eval()
with torch.inference_mode():
loaded_model_1_preds = loaded_model_1(X_test)
print(y_preds == loaded_model_1_preds)