首页
学习
活动
专区
圈层
工具
发布
首页
学习
活动
专区
圈层
工具
MCP广场
社区首页 >问答首页 >UNet loss is NaN + UserWarning:警告:将掩码元素转换为nan

UNet loss is NaN + UserWarning:警告:将掩码元素转换为nan
EN

Stack Overflow用户
提问于 2021-07-20 14:30:25
回答 1查看 59关注 0票数 0

我正在训练一个UNet,它的类看起来像这样:

代码语言:javascript
运行
复制
class UNet(nn.Module):
def __init__(self):
    super().__init__()

    # encoder (downsampling)
    # Each enc_conv/dec_conv block should look like this:
    # nn.Sequential(
    #     nn.Conv2d(...),
    #     ... (2 or 3 conv layers with relu and batchnorm),
    # )
    self.enc_conv0 = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1, stride=1),
        nn.BatchNorm2d(64),
        nn.ReLU()
        )
    self.pool0 = nn.MaxPool2d(kernel_size=2, stride=2, return_indices=False)  # 256 -> 128
    self.enc_conv1 = nn.Sequential(
        nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU()
        )
    self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, return_indices=False) # 128 -> 64
    self.enc_conv2 = nn.Sequential(
        nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1), 
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU()
        )
    self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 64 -> 32
    self.enc_conv3 = nn.Sequential(
        nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),

        nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),

        nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU()
    )
    self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) # 32 -> 16

    # bottleneck
    self.bottleneck_conv = nn.Sequential(
        nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, stride=1, padding=0),
        nn.BatchNorm2d(1024),
        nn.ReLU(),

        nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1, padding=0),
        nn.BatchNorm2d(512),
        nn.ReLU()
    )

    # decoder (upsampling)
    self.upsample0 = nn.UpsamplingBilinear2d(scale_factor=2) # 16 -> 32
    self.dec_conv0 = nn.Sequential(
        nn.Conv2d(in_channels=512*2, out_channels=256, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),

        nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),

        nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU()
    )
    self.upsample1 = nn.UpsamplingBilinear2d(scale_factor=2) # 32 -> 64
    self.dec_conv1 = nn.Sequential(
        nn.Conv2d(in_channels=256*2, out_channels=128, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),

        nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),

        nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU()
    )
    self.upsample2 = nn.UpsamplingBilinear2d(scale_factor=2) # 64 -> 128
    self.dec_conv2 = nn.Sequential(
        nn.Conv2d(in_channels=128*2, out_channels=64, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),

        nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU()
    )
    self.upsample3 = nn.UpsamplingBilinear2d(scale_factor=2) # 128 -> 256
    self.dec_conv3 = nn.Sequential(
        nn.Conv2d(in_channels=64*2, out_channels=1, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(1),
        nn.ReLU(),

        nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(1),
        nn.ReLU(),

        nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(1)
    )

def forward(self, x):
    # encoder
    e0 = self.enc_conv0(x)
    pool0 = self.pool0(e0)
    e1 = self.enc_conv1(pool0)
    pool1 = self.pool1(e1)
    e2 = self.enc_conv2(pool1)
    pool2 = self.pool2(e2)
    e3 = self.enc_conv3(pool2)
    pool3 = self.pool3(e3)

    # bottleneck
    b = self.bottleneck_conv(pool3)

    # decoder
    d0 = self.dec_conv0(torch.cat([self.upsample0(b), e3], 1))
    d1 = self.dec_conv1(torch.cat([self.upsample1(d0), e2], 1))
    d2 = self.dec_conv2(torch.cat([self.upsample2(d1), e1], 1))
    d3 = self.dec_conv3(torch.cat([self.upsample3(d2), e0], 1))  # no activation
    return d3

训练方法:

代码语言:javascript
运行
复制
def train(model, opt, loss_fn, score_fn, epochs, data_tr, data_val):

torch.cuda.empty_cache()

losses_train = []
losses_val = []
scores_train = []
scores_val = []

for epoch in range(epochs):
    tic = time()
    print('* Epoch %d/%d' % (epoch+1, epochs))

    avg_loss = 0
    model.train()  # train mode
    for X_batch, Y_batch in data_tr:
        # data to device
        X_batch = X_batch.to(device)
        Y_batch = Y_batch.to(device)

        # set parameter gradients to zero
        opt.zero_grad()

        # forward
        Y_pred = model(X_batch)
        loss = loss_fn(Y_pred, Y_batch) # forward-pass
        loss.backward()  # backward-pass
        opt.step()  # update weights

        # calculate loss to show the user
        avg_loss += loss / len(data_tr)
    toc = time()
    print('loss: %f' % avg_loss)
    losses_train.append(avg_loss)

    avg_score_train = score_fn(model, iou_pytorch, data_tr)
    scores_train.append(avg_score_train)

    # show intermediate results
    model.eval()  # testing mode
    avg_loss_val = 0
    #Y_hat = # detach and put into cpu

    for X_val, Y_val in data_val:
      with torch.no_grad():
        Y_hat = model(X_val.to(device)).detach().cpu()

        loss = loss_fn(Y_hat, Y_val)
        avg_loss_val += loss / len(data_val)

    toc = time()
    print('loss_val: %f' % avg_loss_val)
    losses_val.append(avg_loss_val)

    avg_score_val = score_fn(model, iou_pytorch, data_val)
    scores_val.append(avg_score_val)

    torch.cuda.empty_cache()

    # Visualize tools
    clear_output(wait=True)
    for k in range(5):
        plt.subplot(2, 6, k+1)
        plt.imshow(np.rollaxis(X_val[k].numpy(), 0, 3), cmap='gray')
        plt.title('Real')
        plt.axis('off')

        plt.subplot(2, 6, k+7)
        plt.imshow(Y_hat[k, 0], cmap='gray')
        plt.title('Output')
        plt.axis('off')
    plt.suptitle('%d / %d - loss: %f' % (epoch+1, epochs, avg_loss))
    plt.show()

return (losses_train, losses_val, scores_train, scores_val)

但是,在执行时,我得到的train_loss和val_loss都等于nan,而且也是一个警告。另外,在绘制分割图像和目标图像时,不显示输出图像。我试着用不同的损失函数来执行,但仍然是一样的。我的课可能出了点问题。

你能帮帮我吗?提前谢谢。

EN

Stack Overflow用户

回答已采纳

发布于 2021-07-20 17:20:13

我不确定这是不是你的错误,但你的最后一个卷积层(self.dec_conv3)看起来很奇怪。我只会在最后一次卷积时减少到1个通道,并且不会使用1个输入通道和1个输出通道执行2个卷积。同样,以batchnorm结尾只能产生规范化输出,这可能与您真正想要的相去甚远:

代码语言:javascript
运行
复制
self.dec_conv3 = nn.Sequential(
    nn.Conv2d(in_channels=64*2, out_channels=32, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm2d(32),
    nn.ReLU(),

    nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.Conv2d(in_channels=32, out_channels=1, kernel_size=3, stride=1, padding=1)
)

如果您的损失在第一次迭代或仅在几次迭代后就已经是Nan,这将是很有趣的。也许,你使用一个损失函数,除以零?

票数 1
EN
查看全部 1 条回答
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/68450437

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档