带有Pytorch Lightning问题的Electra序列分类与' pooler_output'
我正在处理一个句子分类任务,每个句子都附有多个二进制标签。我正在使用Electra和Pytorch Lightning来完成这项工作,但是我遇到了一个问题。当我运行Trainer.fit(模型,数据)
时,我会收到以下错误:
attributeError:'tuple'对象没有属性'pooler_output'
该错误是引用的要在我定义pl的部分中的第13行。LightningModule
:
class CrowdCodedTagger(pl.LightningModule):
def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
super().__init__()
self.electra = ElectraModel.from_pretrained(ELECTRA_MODEL_NAME, return_dict=False) #changed ElectraModel to ElectraForSequenceClassification
self.classifier = nn.Linear(self.electra.config.hidden_size, n_classes)
self.n_training_steps = n_training_steps
self.n_warmup_steps = n_warmup_steps
self.criterion = nn.BCELoss()
def forward(self, input_ids, attention_mask, labels=None):
output = self.electra(input_ids, attention_mask=attention_mask)
output = self.classifier(output.pooler_output) # <---- this is the line the error is referring to.
output = torch.sigmoid(output)
loss = 0
if labels is not None:
loss = self.criterion(output, labels)
return loss, output
def training_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("train_loss", loss, prog_bar=True, logger=True)
return {"loss": loss, "predictions": outputs, "labels": labels}
def validation_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("val_loss", loss, prog_bar=True, logger=True)
return loss
def test_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("test_loss", loss, prog_bar=True, logger=True)
return loss
def training_epoch_end(self, outputs):
labels = []
predictions = []
for output in outputs:
for out_labels in output["labels"].detach().cpu():
labels.append(out_labels)
for out_predictions in output["predictions"].detach().cpu():
predictions.append(out_predictions)
labels = torch.stack(labels).int()
predictions = torch.stack(predictions)
for i, name in enumerate(LABEL_COLUMNS):
class_roc_auc = auroc(predictions[:, i], labels[:, i])
self.logger.experiment.add_scalar(f"{name}_roc_auc/Train", class_roc_auc, self.current_epoch)
def configure_optimizers(self):
optimizer = AdamW(self.parameters(), lr=2e-5)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self.n_warmup_steps,
num_training_steps=self.n_training_steps
)
return dict(
optimizer=optimizer,
lr_scheduler=dict(
scheduler=scheduler,
interval='step'
)
)
有人可以指向我的方向来解决错误吗?
数据结构的示例(在CSV中):
sentence label_1 label_2 label_3
Lorem ipsum dolor sit amet 1 0 1
consectetur adipiscing elit 0 0 0
sed do eiusmod tempor 0 1 1
incididunt ut labore et 1 0 0
Lorem ipsum dolor sit amet 1 0 1
I'm working on a sentence classification task with multiple binary labels attached to each sentence. I'm using Electra and pytorch lightning to do the job, but I've run into a problem. When I'm running the trainer.fit(model, data)
I get the following error:
AttributeError: 'tuple' object has no attribute 'pooler_output'
The error is referring to line 13 in the section where I'm defining pl.LightningModule
:
class CrowdCodedTagger(pl.LightningModule):
def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
super().__init__()
self.electra = ElectraModel.from_pretrained(ELECTRA_MODEL_NAME, return_dict=False) #changed ElectraModel to ElectraForSequenceClassification
self.classifier = nn.Linear(self.electra.config.hidden_size, n_classes)
self.n_training_steps = n_training_steps
self.n_warmup_steps = n_warmup_steps
self.criterion = nn.BCELoss()
def forward(self, input_ids, attention_mask, labels=None):
output = self.electra(input_ids, attention_mask=attention_mask)
output = self.classifier(output.pooler_output) # <---- this is the line the error is referring to.
output = torch.sigmoid(output)
loss = 0
if labels is not None:
loss = self.criterion(output, labels)
return loss, output
def training_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("train_loss", loss, prog_bar=True, logger=True)
return {"loss": loss, "predictions": outputs, "labels": labels}
def validation_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("val_loss", loss, prog_bar=True, logger=True)
return loss
def test_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("test_loss", loss, prog_bar=True, logger=True)
return loss
def training_epoch_end(self, outputs):
labels = []
predictions = []
for output in outputs:
for out_labels in output["labels"].detach().cpu():
labels.append(out_labels)
for out_predictions in output["predictions"].detach().cpu():
predictions.append(out_predictions)
labels = torch.stack(labels).int()
predictions = torch.stack(predictions)
for i, name in enumerate(LABEL_COLUMNS):
class_roc_auc = auroc(predictions[:, i], labels[:, i])
self.logger.experiment.add_scalar(f"{name}_roc_auc/Train", class_roc_auc, self.current_epoch)
def configure_optimizers(self):
optimizer = AdamW(self.parameters(), lr=2e-5)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self.n_warmup_steps,
num_training_steps=self.n_training_steps
)
return dict(
optimizer=optimizer,
lr_scheduler=dict(
scheduler=scheduler,
interval='step'
)
)
Can anyone point me in a direction to fix the error?
EXAMPLE OF DATA STRUCTURE (in CSV):
sentence label_1 label_2 label_3
Lorem ipsum dolor sit amet 1 0 1
consectetur adipiscing elit 0 0 0
sed do eiusmod tempor 0 1 1
incididunt ut labore et 1 0 0
Lorem ipsum dolor sit amet 1 0 1
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
data:image/s3,"s3://crabby-images/d5906/d59060df4059a6cc364216c4d63ceec29ef7fe66" alt="扫码二维码加入Web技术交流群"
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
electra 没有 bert (比较返回部分以获取更多信息)。
如果您只想将[Cls]令牌用于序列分类,则可以简单地获取Last_hidden_state的第一个元素(初始化electra没有
return> return_dict = false
):ELECTRA has no pooler layer like BERT (compare the return section for further information).
In case you only want to use the [CLS] token for your sequence classification, you can simply take the first element of the last_hidden_state (initialize electra without
return_dict=False
):