ndf = 128
z_size = 512
# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
    def __init__(self):
        super(MyNetwork, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True))
        self.decoder = nn.Sequential(       
            nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),        
            nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True))
        self.fc1 = nn.Linear(ndf*4*8*16,z_size)
        self.fc2 = nn.Linear(ndf*4*8*16,z_size)
        self.d1 = nn.Linear(z_size, ndf*4*8*8)
        self.z_size = z_size
        self.d_max = ndf *4
    def encode(self, x):
        x = self.encoder(x) 
        x = x.view(x.shape[0], -1)  
        mu = self.fc1(x)
        log_var = self.fc2(x)
        return mu, log_var
    def decode(self,x):
        x = x.view(x.shape[0], self.z_size)
        x = self.d1(x)
        x = x.view(x.shape[0], self.d_max, 8,8)
        x = self.decoder(x)
        return x
    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return eps.mul(std).add_(mu)
    def forward(self, x):
        mu, log_var = self.encode(x)
        mu = mu.squeeze()
        log_var = log_var.squeeze()
        z = self.reparameterize(mu, log_var)
        return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var我改编了一个教程中的代码,当我尝试运行我的自动编码器时,我得到了错误‘给定transposed=1,权重为64 16 33,预期input16,512,8,8有64个通道,但得到512个通道’。
有人可以解释一下,我应该如何使用CIFAR10数据集进一步调整这段代码,批处理大小为16。
发布于 2020-03-04 13:53:46
浏览您的代码,发现输入和输出维度不正确。
假设输入和输出数组的形状都是32x32x3,我格式化了这段代码。
ndf = 128
z_size = 512
# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
    def __init__(self):
        super(MyNetwork, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True))
        self.decoder = nn.Sequential(       
            nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),        
            nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True))
        self.fc1 = nn.Linear(ndf*4*8*16,z_size)
        self.fc2 = nn.Linear(ndf*4*8*16,z_size)
        self.d1 = nn.Linear(z_size, ndf*4*8*16)
        self.z_size = z_size
        self.d_max = ndf *4
    def encode(self, x):
        x = self.encoder(x) 
        x = x.view(x.shape[0], -1)  
        mu = self.fc1(x)
        log_var = self.fc2(x)
        return mu, log_var
    def decode(self,x):
        x = x.view(x.shape[0], self.z_size)
        x = self.d1(x)
        x = x.view(x.shape[0], 64, 32, 32)
        x = self.decoder(x)
        return x
    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return eps.mul(std).add_(mu)
    def forward(self, x):
        mu, log_var = self.encode(x)
        mu = mu.squeeze()
        log_var = log_var.squeeze()
        z = self.reparameterize(mu, log_var)
        return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var希望这段代码能正常工作:)
发布于 2020-03-04 14:00:20
decoder ( forward函数中的x = self.decoder(x))的输入应具有64个通道,如nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1)所定义。
为此,您可以执行以下更改:
在x = x.view(x.shape[0], 64, 32, 32)的self.d1 = nn.Linear(z_size, ndf*4*8*16).
self.d1 = nn.Linear(z_size, ndf*4*8*8)从x = x.view(x.shape[0], self.d_max, 8,8)更改为decoder使用print语句分析不同层的输入张量的形状:
ndf = 128
z_size = 512
# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
    def __init__(self):
        super(MyNetwork, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True))
        self.decoder = nn.Sequential(       
            nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),        
            nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
            nn.ReLU(True))
        self.fc1 = nn.Linear(ndf*4*8*16,z_size)
        self.fc2 = nn.Linear(ndf*4*8*16,z_size)
        self.d1 = nn.Linear(z_size, ndf*4*8*16)
        self.z_size = z_size
        self.d_max = ndf *4
    def encode(self, x):
        print('encoder')
        print(x.shape)
        x = self.encoder(x)
        print(x.shape) 
        x = x.view(x.shape[0], -1)  
        print(x.shape)
        mu = self.fc1(x)
        print(mu.shape)
        log_var = self.fc2(x)
        print(log_var.shape)
        return mu, log_var
    def decode(self,x):
        print('decoder')
        print(x.shape)
        x = x.view(x.shape[0], self.z_size)
        print(x.shape)
        x = self.d1(x)
        print(x.shape)
        x = x.view(x.shape[0], 64, 32, 32)
        print(x.shape)
        x = self.decoder(x)
        print(x.shape)
        return x
    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return eps.mul(std).add_(mu)
    def forward(self, x):
        mu, log_var = self.encode(x)
        mu = mu.squeeze()
        log_var = log_var.squeeze()
        z = self.reparameterize(mu, log_var)
        return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_varhttps://stackoverflow.com/questions/60514292
复制相似问题