pytorch nn.Crossentropyloss()总是返回0
我正在建立一个多级视觉变压器网络。当我的值通过我的损耗函数时,它总是返回零。我的输出层组合为37个密集的层,每个层上都有一个软性单元。使用NN.Crossentropyloss()创建标准。每次迭代的标准输出均为0.0。我正在使用COLAB笔记本。我打印了一个迭代的输出和标签:
for output, label in zip(iter(ouputs_t), iter(labels_t)):
loss += criterion(
output,
# reshape label from (Batch_Size) to (Batch_Size, 1)
torch.reshape(label, (label.shape[0] , 1 ))
)
output: tensor([[0.1534],
[0.5797],
[0.6554],
[0.4066],
[0.2683],
[0.1773],
[0.7410],
[0.5136],
[0.5695],
[0.3970],
[0.4317],
[0.7216],
[0.8336],
[0.4517],
[0.4004],
[0.5963],
[0.3079],
[0.5956],
[0.3876],
[0.2327],
[0.7919],
[0.2722],
[0.3064],
[0.9779],
[0.8358],
[0.1851],
[0.2869],
[0.3128],
[0.4301],
[0.4740],
[0.6689],
[0.7588]], device='cuda:0', grad_fn=<UnbindBackward0>)
label: tensor([[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[1.],
[1.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[1.],
[0.]], device='cuda:0')
我的模型:
class vit_large_patch16_224_multiTaskNet(nn.Module):
def __init__(self, output_classes, frozen_feature_layers=False):
super().__init__()
vit_base_patch16_224 = timm.create_model('vit_large_patch16_224',pretrained=True)
self.is_frozen = frozen_feature_layers
# here we get all the modules(layers) before the fc layer at the end
self.features = nn.ModuleList(vit_base_patch16_224.children())[:-1]
self.features = nn.Sequential(*self.features)
if frozen_feature_layers:
self.freeze_feature_layers()
# now lets add our new layers
in_features = vit_base_patch16_224.head.in_features
# it helps with performance. you can play with it
# create more layers, play/experiment with them.
self.fc0 = nn.Linear(in_features, 512)
self.bn_pu = nn.BatchNorm1d(512, eps = 1e-5)
self.output_modules = nn.ModuleList()
for i in range(output_classes):
self.output_modules.append(nn.Linear(512, 1))
# initialize all fc layers to xavier
for m in self.modules():
if isinstance(m, nn.Linear):
torch.nn.init.xavier_normal_(m.weight, gain = 1)
def forward(self, input_imgs):
output = self.features(input_imgs)
final_cs_token = output[:, 0]
output = self.bn_pu(F.relu(self.fc0(final_cs_token)))
output_list= list()
for output_modul in self.output_modules:
output_list.append(torch.sigmoid(output_modul(output)))
# Convert List to Tensor
output_tensor = torch.stack(output_list)
#
output_tensor = torch.swapaxes(output_tensor, 0 , 1)
return output_tensor
def _set_freeze_(self, status):
for n,p in self.features.named_parameters():
p.requires_grad = status
# for m in self.features.children():
# for p in m.parameters():
# p.requires_grad=status
def freeze_feature_layers(self):
self._set_freeze_(False)
def unfreeze_feature_layers(self):
self._set_freeze_(True)
I am building a multi-class Vision Transformer Network. When passing my values through my loss function, it always returns zero. My output layer consisits of 37 Dense Layers with a softmax-unit on each on of them. criterion is created with nn.CrossEntropyLoss().The output of criterion is 0.0 for every iteration. I am using the colab notebook. I printed out the output and label for one iteration:
for output, label in zip(iter(ouputs_t), iter(labels_t)):
loss += criterion(
output,
# reshape label from (Batch_Size) to (Batch_Size, 1)
torch.reshape(label, (label.shape[0] , 1 ))
)
output: tensor([[0.1534],
[0.5797],
[0.6554],
[0.4066],
[0.2683],
[0.1773],
[0.7410],
[0.5136],
[0.5695],
[0.3970],
[0.4317],
[0.7216],
[0.8336],
[0.4517],
[0.4004],
[0.5963],
[0.3079],
[0.5956],
[0.3876],
[0.2327],
[0.7919],
[0.2722],
[0.3064],
[0.9779],
[0.8358],
[0.1851],
[0.2869],
[0.3128],
[0.4301],
[0.4740],
[0.6689],
[0.7588]], device='cuda:0', grad_fn=<UnbindBackward0>)
label: tensor([[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[1.],
[1.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[1.],
[0.]], device='cuda:0')
My Model:
class vit_large_patch16_224_multiTaskNet(nn.Module):
def __init__(self, output_classes, frozen_feature_layers=False):
super().__init__()
vit_base_patch16_224 = timm.create_model('vit_large_patch16_224',pretrained=True)
self.is_frozen = frozen_feature_layers
# here we get all the modules(layers) before the fc layer at the end
self.features = nn.ModuleList(vit_base_patch16_224.children())[:-1]
self.features = nn.Sequential(*self.features)
if frozen_feature_layers:
self.freeze_feature_layers()
# now lets add our new layers
in_features = vit_base_patch16_224.head.in_features
# it helps with performance. you can play with it
# create more layers, play/experiment with them.
self.fc0 = nn.Linear(in_features, 512)
self.bn_pu = nn.BatchNorm1d(512, eps = 1e-5)
self.output_modules = nn.ModuleList()
for i in range(output_classes):
self.output_modules.append(nn.Linear(512, 1))
# initialize all fc layers to xavier
for m in self.modules():
if isinstance(m, nn.Linear):
torch.nn.init.xavier_normal_(m.weight, gain = 1)
def forward(self, input_imgs):
output = self.features(input_imgs)
final_cs_token = output[:, 0]
output = self.bn_pu(F.relu(self.fc0(final_cs_token)))
output_list= list()
for output_modul in self.output_modules:
output_list.append(torch.sigmoid(output_modul(output)))
# Convert List to Tensor
output_tensor = torch.stack(output_list)
#
output_tensor = torch.swapaxes(output_tensor, 0 , 1)
return output_tensor
def _set_freeze_(self, status):
for n,p in self.features.named_parameters():
p.requires_grad = status
# for m in self.features.children():
# for p in m.parameters():
# p.requires_grad=status
def freeze_feature_layers(self):
self._set_freeze_(False)
def unfreeze_feature_layers(self):
self._set_freeze_(True)
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
您处于多级分类方案,这意味着您可以将问题视为
c
- binary类分类(其中c
是类的总数) 。具有output_t
logit张量,包含模型的最后一个线性层输出的值和target
地面真实量张量,其中包含批处理中每个实例的真实类别状态。您可以应用nn.bcewithlogitsloss
既然它可以使用开箱即用的多维张量:
使用虚拟输入:
然后初始化并调用损失函数:
You are in a multi-class classification scenario, which means you can consider your problem as
c
-binary class classification done in parallel (wherec
is the total number of class). Havingoutput_t
the logit tensor containing the values outputted by your model's last linear layer andtarget
the ground-truth tensor containing the true classes states for each instance in the batch. You can applynn.BCEWithLogitsLoss
since it works with multi-dimensional tensors out of the box:With dummy inputs:
Then initializing and calling the loss function: