运行时错误:mat1 和 mat2 形状无法相乘(4000x20 和 200x441)

发布于 2025-01-11 10:03:23 字数 7885 浏览 0 评论 0原文

我的变分自动编码器的解码器的架构在下面的代码片段中给出

class ConvolutionalVAE(nn.Module):
    
    def __init__(self, nchannel, base_channels, z_dim, hidden_dim,  device, img_width, batch_size):
        super(ConvolutionalVAE, self).__init__()

        self.nchannel   = nchannel
        self.base_channels = base_channels
        self.z_dim      = z_dim
        self.hidden_dim = hidden_dim
        self.device     = device
        self.img_width  = img_width
        self.batch_size = batch_size
        self.enc_kernel = 4
        self.enc_stride = 2
        self._to_linear = None
        ########################
        # ENCODER-CONVOLUTION LAYERS
        self.conv0       = nn.Conv2d(nchannel, base_channels, self.enc_kernel, stride=self.enc_stride)
        self.bn2d_0      = nn.BatchNorm2d(self.base_channels)
        self.LeakyReLU_0 = nn.LeakyReLU(0.2)
        out_width        = np.floor((self.img_width - self.enc_kernel) / self.enc_stride + 1)
        self.conv1       = nn.Conv2d(base_channels, base_channels*2, self.enc_kernel, stride=self.enc_stride)
        self.bn2d_1      = nn.BatchNorm2d(base_channels*2)
        self.LeakyReLU_1 = nn.LeakyReLU(0.2)
        out_width        = np.floor((out_width - self.enc_kernel) / self.enc_stride + 1)
        self.conv2       = nn.Conv2d(base_channels*2, base_channels*4, self.enc_kernel, stride=self.enc_stride)
        self.bn2d_2      = nn.BatchNorm2d(base_channels*4)
        self.LeakyReLU_2 = nn.LeakyReLU(0.2)
        out_width        = np.floor((out_width - self.enc_kernel) / self.enc_stride + 1)
        self.conv3       = nn.Conv2d(base_channels*4, base_channels*8, self.enc_kernel, stride=self.enc_stride)
        self.bn2d_3      = nn.BatchNorm2d(base_channels*8)
        self.LeakyReLU_3 = nn.LeakyReLU(0.2)
        out_width        = int(np.floor((out_width - self.enc_kernel) / self.enc_stride + 1))
        ########################
        #ENCODER-USING FULLY CONNECTED LAYERS
        #THE LATENT SPACE (Z)
        self.flatten     = nn.Flatten()
        self.fc0         = nn.Linear((out_width**2) * base_channels * 8, base_channels*8*4*4, bias=False)
        self.bn1d        = nn.BatchNorm1d(base_channels*8*4*4)
        self.fc1         = nn.Linear(base_channels*8*4*4, hidden_dim, bias=False)
        self.bn1d_1      = nn.BatchNorm1d(hidden_dim)
        # mean of z

        self.fc2         = nn.Linear(hidden_dim, z_dim, bias=False)
        self.bn1d_2      = nn.BatchNorm1d(z_dim)
        # variance of z

        self.fc3         = nn.Linear(hidden_dim, z_dim, bias=False)
        self.bn1d_3      = nn.BatchNorm1d(z_dim)
        ########################
        # DECODER: 
        #  P(X|Z)
        conv2d_transpose_kernels, conv2d_transpose_input_width = self.determine_decoder_params(self.z_dim, self.img_width)
        self.conv2d_transpose_input_width = conv2d_transpose_input_width
        self.px_z_fc_0   = nn.Linear(self.z_dim, conv2d_transpose_input_width ** 2)
        self.px_z_bn1d_0 = nn.BatchNorm1d(conv2d_transpose_input_width ** 2)
        self.px_z_fc_1   = nn.Linear(conv2d_transpose_input_width ** 2, conv2d_transpose_input_width ** 2)
        #self.unflatten = nn.Unflatten(1, (1, conv2d_transpose_input_width, conv2d_transpose_input_width))
        self.conv2d_transpose_input_width = conv2d_transpose_input_width
        self.px_z_conv_transpose2d = nn.ModuleList()
        self.px_z_bn2d   = nn.ModuleList()
        self.n_conv2d_transpose = len(conv2d_transpose_kernels)
        self.px_z_conv_transpose2d.append(nn.ConvTranspose2d(1, self.base_channels * (self.n_conv2d_transpose - 1),
                                                             kernel_size=conv2d_transpose_kernels[0], stride=2))
        self.px_z_bn2d.append(nn.BatchNorm2d(self.base_channels * (self.n_conv2d_transpose - 1)))
        self.px_z_LeakyReLU = nn.ModuleList()
        self.px_z_LeakyReLU.append(nn.LeakyReLU(0.2))
        
        for i in range(1, self.n_conv2d_transpose - 1):
            self.px_z_conv_transpose2d.append(nn.ConvTranspose2d(self.base_channels * (self.n_conv2d_transpose - i),
                                                                 self.base_channels*(self.n_conv2d_transpose - i - 1),
                                                                 kernel_size=conv2d_transpose_kernels[i], stride=2))
            self.px_z_bn2d.append(nn.BatchNorm2d(self.base_channels * (self.n_conv2d_transpose - i - 1)))
            self.px_z_LeakyReLU.append(nn.LeakyReLU(0.2))
        self.px_z_conv_transpose2d.append(nn.ConvTranspose2d(self.base_channels, self.nchannel,
                                                             kernel_size=conv2d_transpose_kernels[-1], stride=2))
                self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.to(device=self.device)


    def decoder(self, z_input):
        #Generate X: P(X|Z)
        h  = F.relu(self.px_z_bn1d_0(self.px_z_fc_0(z_input)))
        flattened_h = self.px_z_fc_1(h)
        h = flattened_h.view(flattened_h.size()[0], 1, self.conv2d_transpose_input_width, self.conv2d_transpose_input_width)
        for i in range(self.n_conv2d_transpose - 1):
            h = self.px_z_LeakyReLU[i](self.px_z_bn2d[i](self.px_z_conv_transpose2d[i](h)))
        x_recons_mean_flat = torch.sigmoid(self.px_z_conv_transpose2d[self.n_conv2d_transpose - 1](h))
        return x_recons_mean_flat

,运行我的代码来重建图像:

all_z = []
for d in range(self.z_dim):
   temp_z = torch.cat( [self.z_sample_list[k][:, d].unsqueeze(1) for k in range(self.K)], dim=1)
   print(f'size of each z component dimension: {temp_z.size()}')
   all_z.append(torch.mm(temp_z.transpose(1, 0), components).unsqueeze(1))
out       = torch.cat( all_z,1)
x_samples       = self.decoder(out) 

我收到此错误消息:

size of z dimension: 200
size of each z component dimension: torch.Size([50, 20])
size of all z component dimension: torch.Size([20, 200, 20])
x_samples = self.decoder(out)
File "VAE.py", line 241, in decoder
h  = F.relu(self.px_z_bn1d_0(self.px_z_fc_0(z_input)))
File "/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/anaconda3/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 96, in forward
return F.linear(input, self.weight, self.bias)
File "/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 1847, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (4000x20 and 200x441)

更新 我稍微改变了我的代码

all_z = []
for d in range(self.z_dim):
    temp_z = torch.cat( [self.z_sample_list[k][:, d].unsqueeze(1) for k in range(self.K)], dim=1)
    all_z.append(torch.mm(temp_z.transpose(1, 0), components).unsqueeze(1))
out       = torch.cat( all_z,1)
print(f'size of all z component dimension: {out.size()}')
out = F.pad(input=out, pad=(1, 0, 0,0, 0, 1), mode='constant', value=0)
print(f'new size of all z component dimension after padding: {out.size()}')
out = rearrange(out, 'd0 d1 d2 -> d1 (d0 d2)')
x_samples       = self.decoder(out)

现在新的错误是

x_samples       = self.decoder(out)
File "VAE.py", line 243, in decoder
h  = F.relu(self.px_z_bn1d_0(self.px_z_fc_0(z_input)))
File "/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/anaconda3/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 96, in forward
return F.linear(input, self.weight, self.bias)
File "/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 1847, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (200x441 and 200x441)

任何建议来修复这个错误?

The architecture of the decoder of my variational autoencoder is given in the snippet below

class ConvolutionalVAE(nn.Module):
    
    def __init__(self, nchannel, base_channels, z_dim, hidden_dim,  device, img_width, batch_size):
        super(ConvolutionalVAE, self).__init__()

        self.nchannel   = nchannel
        self.base_channels = base_channels
        self.z_dim      = z_dim
        self.hidden_dim = hidden_dim
        self.device     = device
        self.img_width  = img_width
        self.batch_size = batch_size
        self.enc_kernel = 4
        self.enc_stride = 2
        self._to_linear = None
        ########################
        # ENCODER-CONVOLUTION LAYERS
        self.conv0       = nn.Conv2d(nchannel, base_channels, self.enc_kernel, stride=self.enc_stride)
        self.bn2d_0      = nn.BatchNorm2d(self.base_channels)
        self.LeakyReLU_0 = nn.LeakyReLU(0.2)
        out_width        = np.floor((self.img_width - self.enc_kernel) / self.enc_stride + 1)
        self.conv1       = nn.Conv2d(base_channels, base_channels*2, self.enc_kernel, stride=self.enc_stride)
        self.bn2d_1      = nn.BatchNorm2d(base_channels*2)
        self.LeakyReLU_1 = nn.LeakyReLU(0.2)
        out_width        = np.floor((out_width - self.enc_kernel) / self.enc_stride + 1)
        self.conv2       = nn.Conv2d(base_channels*2, base_channels*4, self.enc_kernel, stride=self.enc_stride)
        self.bn2d_2      = nn.BatchNorm2d(base_channels*4)
        self.LeakyReLU_2 = nn.LeakyReLU(0.2)
        out_width        = np.floor((out_width - self.enc_kernel) / self.enc_stride + 1)
        self.conv3       = nn.Conv2d(base_channels*4, base_channels*8, self.enc_kernel, stride=self.enc_stride)
        self.bn2d_3      = nn.BatchNorm2d(base_channels*8)
        self.LeakyReLU_3 = nn.LeakyReLU(0.2)
        out_width        = int(np.floor((out_width - self.enc_kernel) / self.enc_stride + 1))
        ########################
        #ENCODER-USING FULLY CONNECTED LAYERS
        #THE LATENT SPACE (Z)
        self.flatten     = nn.Flatten()
        self.fc0         = nn.Linear((out_width**2) * base_channels * 8, base_channels*8*4*4, bias=False)
        self.bn1d        = nn.BatchNorm1d(base_channels*8*4*4)
        self.fc1         = nn.Linear(base_channels*8*4*4, hidden_dim, bias=False)
        self.bn1d_1      = nn.BatchNorm1d(hidden_dim)
        # mean of z

        self.fc2         = nn.Linear(hidden_dim, z_dim, bias=False)
        self.bn1d_2      = nn.BatchNorm1d(z_dim)
        # variance of z

        self.fc3         = nn.Linear(hidden_dim, z_dim, bias=False)
        self.bn1d_3      = nn.BatchNorm1d(z_dim)
        ########################
        # DECODER: 
        #  P(X|Z)
        conv2d_transpose_kernels, conv2d_transpose_input_width = self.determine_decoder_params(self.z_dim, self.img_width)
        self.conv2d_transpose_input_width = conv2d_transpose_input_width
        self.px_z_fc_0   = nn.Linear(self.z_dim, conv2d_transpose_input_width ** 2)
        self.px_z_bn1d_0 = nn.BatchNorm1d(conv2d_transpose_input_width ** 2)
        self.px_z_fc_1   = nn.Linear(conv2d_transpose_input_width ** 2, conv2d_transpose_input_width ** 2)
        #self.unflatten = nn.Unflatten(1, (1, conv2d_transpose_input_width, conv2d_transpose_input_width))
        self.conv2d_transpose_input_width = conv2d_transpose_input_width
        self.px_z_conv_transpose2d = nn.ModuleList()
        self.px_z_bn2d   = nn.ModuleList()
        self.n_conv2d_transpose = len(conv2d_transpose_kernels)
        self.px_z_conv_transpose2d.append(nn.ConvTranspose2d(1, self.base_channels * (self.n_conv2d_transpose - 1),
                                                             kernel_size=conv2d_transpose_kernels[0], stride=2))
        self.px_z_bn2d.append(nn.BatchNorm2d(self.base_channels * (self.n_conv2d_transpose - 1)))
        self.px_z_LeakyReLU = nn.ModuleList()
        self.px_z_LeakyReLU.append(nn.LeakyReLU(0.2))
        
        for i in range(1, self.n_conv2d_transpose - 1):
            self.px_z_conv_transpose2d.append(nn.ConvTranspose2d(self.base_channels * (self.n_conv2d_transpose - i),
                                                                 self.base_channels*(self.n_conv2d_transpose - i - 1),
                                                                 kernel_size=conv2d_transpose_kernels[i], stride=2))
            self.px_z_bn2d.append(nn.BatchNorm2d(self.base_channels * (self.n_conv2d_transpose - i - 1)))
            self.px_z_LeakyReLU.append(nn.LeakyReLU(0.2))
        self.px_z_conv_transpose2d.append(nn.ConvTranspose2d(self.base_channels, self.nchannel,
                                                             kernel_size=conv2d_transpose_kernels[-1], stride=2))
                self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.to(device=self.device)


    def decoder(self, z_input):
        #Generate X: P(X|Z)
        h  = F.relu(self.px_z_bn1d_0(self.px_z_fc_0(z_input)))
        flattened_h = self.px_z_fc_1(h)
        h = flattened_h.view(flattened_h.size()[0], 1, self.conv2d_transpose_input_width, self.conv2d_transpose_input_width)
        for i in range(self.n_conv2d_transpose - 1):
            h = self.px_z_LeakyReLU[i](self.px_z_bn2d[i](self.px_z_conv_transpose2d[i](h)))
        x_recons_mean_flat = torch.sigmoid(self.px_z_conv_transpose2d[self.n_conv2d_transpose - 1](h))
        return x_recons_mean_flat

running my code to reconstruct the images:

all_z = []
for d in range(self.z_dim):
   temp_z = torch.cat( [self.z_sample_list[k][:, d].unsqueeze(1) for k in range(self.K)], dim=1)
   print(f'size of each z component dimension: {temp_z.size()}')
   all_z.append(torch.mm(temp_z.transpose(1, 0), components).unsqueeze(1))
out       = torch.cat( all_z,1)
x_samples       = self.decoder(out) 

I got this error message:

size of z dimension: 200
size of each z component dimension: torch.Size([50, 20])
size of all z component dimension: torch.Size([20, 200, 20])
x_samples = self.decoder(out)
File "VAE.py", line 241, in decoder
h  = F.relu(self.px_z_bn1d_0(self.px_z_fc_0(z_input)))
File "/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/anaconda3/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 96, in forward
return F.linear(input, self.weight, self.bias)
File "/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 1847, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (4000x20 and 200x441)

Update
I changed my code slightly to this

all_z = []
for d in range(self.z_dim):
    temp_z = torch.cat( [self.z_sample_list[k][:, d].unsqueeze(1) for k in range(self.K)], dim=1)
    all_z.append(torch.mm(temp_z.transpose(1, 0), components).unsqueeze(1))
out       = torch.cat( all_z,1)
print(f'size of all z component dimension: {out.size()}')
out = F.pad(input=out, pad=(1, 0, 0,0, 0, 1), mode='constant', value=0)
print(f'new size of all z component dimension after padding: {out.size()}')
out = rearrange(out, 'd0 d1 d2 -> d1 (d0 d2)')
x_samples       = self.decoder(out)

Now the new error is

x_samples       = self.decoder(out)
File "VAE.py", line 243, in decoder
h  = F.relu(self.px_z_bn1d_0(self.px_z_fc_0(z_input)))
File "/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/anaconda3/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 96, in forward
return F.linear(input, self.weight, self.bias)
File "/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 1847, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (200x441 and 200x441)

Any suggestion to fix this error?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

红尘作伴 2025-01-18 10:03:23

矩阵乘法要求两个内部维度相同。您收到错误:RuntimeError:mat1 和 mat2 形状无法相乘(200x441 和 200x441),因为您的内部尺寸未对齐。

例如:

shape(200, 441) * shape(441, 200) # works
shape(441, 200) * shape(200, 441) # works
shape(200, 441) * shape(200, 441) # doesn't work, this is why you are getting your error

# in general
shape(x, y) * shape(y, z) # works

要使内部尺寸匹配,只需将其中一个转置即可:

shape(200, 441) * shape(200, 441).T # works
# or
shape(200, 441).T * shape(200, 441) # works

# since the transpose works by swapping the dimensions:
shape(200, 441).T = shape(441, 200)

Matrix multiplication requires the 2 inner dimensions to be the same. You are getting the error: RuntimeError: mat1 and mat2 shapes cannot be multiplied (200x441 and 200x441) because your inner dimensions don't line up.

for example:

shape(200, 441) * shape(441, 200) # works
shape(441, 200) * shape(200, 441) # works
shape(200, 441) * shape(200, 441) # doesn't work, this is why you are getting your error

# in general
shape(x, y) * shape(y, z) # works

To make the inner dimensions match, just take the transpose of one or the other:

shape(200, 441) * shape(200, 441).T # works
# or
shape(200, 441).T * shape(200, 441) # works

# since the transpose works by swapping the dimensions:
shape(200, 441).T = shape(441, 200)
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文