首页
学习
活动
专区
圈层
工具
发布
首页
学习
活动
专区
圈层
工具
MCP广场
社区首页 >问答首页 >得到512个通道而不是64个-我应该在我的自动编码器中更改什么?

得到512个通道而不是64个-我应该在我的自动编码器中更改什么?
EN

Stack Overflow用户
提问于 2020-03-04 03:32:57
回答 2查看 147关注 0票数 0
代码语言:javascript
运行
复制
ndf = 128
z_size = 512

# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
    def __init__(self):
        super(MyNetwork, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True))

        self.decoder = nn.Sequential(       
            nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),        
            nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True))

        self.fc1 = nn.Linear(ndf*4*8*16,z_size)
        self.fc2 = nn.Linear(ndf*4*8*16,z_size)
        self.d1 = nn.Linear(z_size, ndf*4*8*8)
        self.z_size = z_size
        self.d_max = ndf *4

    def encode(self, x):
        x = self.encoder(x) 
        x = x.view(x.shape[0], -1)  
        mu = self.fc1(x)
        log_var = self.fc2(x)
        return mu, log_var

    def decode(self,x):
        x = x.view(x.shape[0], self.z_size)
        x = self.d1(x)
        x = x.view(x.shape[0], self.d_max, 8,8)
        x = self.decoder(x)
        return x

    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return eps.mul(std).add_(mu)

    def forward(self, x):
        mu, log_var = self.encode(x)
        mu = mu.squeeze()
        log_var = log_var.squeeze()
        z = self.reparameterize(mu, log_var)
        return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var

我改编了一个教程中的代码,当我尝试运行我的自动编码器时,我得到了错误‘给定transposed=1,权重为64 16 33,预期input16,512,8,8有64个通道,但得到512个通道’。

有人可以解释一下,我应该如何使用CIFAR10数据集进一步调整这段代码,批处理大小为16。

EN

回答 2

Stack Overflow用户

发布于 2020-03-04 13:53:46

浏览您的代码,发现输入和输出维度不正确。

假设输入和输出数组的形状都是32x32x3,我格式化了这段代码。

代码语言:javascript
运行
复制
ndf = 128
z_size = 512

# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
    def __init__(self):
        super(MyNetwork, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True))

        self.decoder = nn.Sequential(       
            nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),        
            nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True))

        self.fc1 = nn.Linear(ndf*4*8*16,z_size)
        self.fc2 = nn.Linear(ndf*4*8*16,z_size)
        self.d1 = nn.Linear(z_size, ndf*4*8*16)
        self.z_size = z_size
        self.d_max = ndf *4

    def encode(self, x):
        x = self.encoder(x) 
        x = x.view(x.shape[0], -1)  
        mu = self.fc1(x)
        log_var = self.fc2(x)
        return mu, log_var

    def decode(self,x):
        x = x.view(x.shape[0], self.z_size)
        x = self.d1(x)
        x = x.view(x.shape[0], 64, 32, 32)
        x = self.decoder(x)
        return x

    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return eps.mul(std).add_(mu)

    def forward(self, x):
        mu, log_var = self.encode(x)
        mu = mu.squeeze()
        log_var = log_var.squeeze()
        z = self.reparameterize(mu, log_var)
        return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var

希望这段代码能正常工作:)

票数 0
EN

Stack Overflow用户

发布于 2020-03-04 14:00:20

decoder ( forward函数中的x = self.decoder(x))的输入应具有64个通道,如nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1)所定义。

为此,您可以执行以下更改:

x = x.view(x.shape[0], 64, 32, 32)self.d1 = nn.Linear(z_size, ndf*4*8*16).

  • Change方法中,
  • self.d1 = nn.Linear(z_size, ndf*4*8*8)x = x.view(x.shape[0], self.d_max, 8,8)更改为decoder

使用print语句分析不同层的输入张量的形状:

代码语言:javascript
运行
复制
ndf = 128
z_size = 512

# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
    def __init__(self):
        super(MyNetwork, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True))

        self.decoder = nn.Sequential(       
            nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),        
            nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True))

        self.fc1 = nn.Linear(ndf*4*8*16,z_size)
        self.fc2 = nn.Linear(ndf*4*8*16,z_size)
        self.d1 = nn.Linear(z_size, ndf*4*8*16)
        self.z_size = z_size
        self.d_max = ndf *4

    def encode(self, x):
        print('encoder')
        print(x.shape)
        x = self.encoder(x)
        print(x.shape) 
        x = x.view(x.shape[0], -1)  
        print(x.shape)
        mu = self.fc1(x)
        print(mu.shape)
        log_var = self.fc2(x)
        print(log_var.shape)
        return mu, log_var

    def decode(self,x):
        print('decoder')
        print(x.shape)
        x = x.view(x.shape[0], self.z_size)
        print(x.shape)
        x = self.d1(x)
        print(x.shape)
        x = x.view(x.shape[0], 64, 32, 32)
        print(x.shape)
        x = self.decoder(x)
        print(x.shape)
        return x

    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return eps.mul(std).add_(mu)

    def forward(self, x):
        mu, log_var = self.encode(x)
        mu = mu.squeeze()
        log_var = log_var.squeeze()
        z = self.reparameterize(mu, log_var)
        return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var
票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/60514292

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档