类型错误:forward() 得到了意外的关键字参数“return_dict” BERT 分类 HUGGIFACE 与调优
我堆满了这个模型,每天我的代码都会出现错误!无论如何,我正在尝试实现一个 Bert 分类器来区分 2 个序列类(二进制分类),并使用 AX 超参数调整。 这是我的数据集样本预期实现的所有代码(我有 3 个 csv,train-test-val)。非常感谢 !
df_train=pd.read_csv('CLASSIFIER_train',sep=',',header=None)
df_train
0 1
M A T T D R P T P D G T D A I D L T T R V R R... 1
M K K L F Q T E P L L E L F N C N E L R I I G... 0
M L V A A A V C P H P P L L I P E L A A G A A... 1
M I V A W G N S G S G L L I L I L S L A V S A... 0
M V E E G R R L A A L H P N I V V K L P T T E... 1
M G S K V S K N A L V F N V L Q A L R E G L T... 1
M P S K E T S P A E R M A R D E Y Y M R L A M... 1
M V K E Y A L E W I D G Y R E R L V K V S D A... 1
M G T A A S Q D R A A M A E A A Q R V G D S F... 0
class SequenceDataset(Dataset):
def __init__(self, sequences, targets, tokenizer, max_len):
self.sequences = sequences
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.sequences)
def __getitem__(self, item):
sequences = str(self.sequences[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
sequences,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'sequences_text': sequences,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
class SequenceDataset(Dataset):
def __init__(self, sequences, targets, tokenizer, max_len):
self.sequences = sequences
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.sequences)
def __getitem__(self, item):
sequences = str(self.sequences[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
sequences,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'sequences_text': sequences,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = SequenceDataset(
sequences=df[0].to_numpy(),
targets=df[1].to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=2,
shuffle=True
)
def net_train(net, train_data_loader, parameters, dtype, device):
net.to(dtype=dtype, device=device)
# Define loss and optimizer
#criterion = nn.CrossEntropyLoss()
criterion = nn.NLLLoss()
optimizer = optim.SGD(net.parameters(), # or any optimizer you prefer
lr=parameters.get("lr", 0.001), # 0.001 is used if no lr is specified
momentum=parameters.get("momentum", 0.9)
)
scheduler = optim.lr_scheduler.StepLR(
optimizer,
step_size=int(parameters.get("step_size", 30)),
gamma=parameters.get("gamma", 1.0), # default is no learning rate decay
)
num_epochs = parameters.get("num_epochs", 3) # Play around with epoch number
# Train Network
# Train Network
for _ in range(num_epochs):
# Your dataloader returns a dictionary
# so access it as such
for batch in train_data_loader:
# move data to proper dtype and device
labels = batch['targets'].to(device=device)
attention_mask = batch['attention_mask'].to(device=device)
input_ids = batch['input_ids'].to(device=device)
#labels = labels.long()
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs,x= net(input_ids, attention_mask,return_dict=True)
#outputs,x= net(input_ids,atten_mask)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
scheduler.step()
return net
class BERT_Arch(nn.Module):
def __init__(self, bert):
super(BERT_Arch, self).__init__()
self.bert = bert
# dropout layer
self.dropout = nn.Dropout(0.1)
# relu activation function
self.relu = nn.ReLU()
# dense layer 1
self.fc1 = nn.Linear(1024,512)
# dense layer 2 (Output layer)
self.fc2 = nn.Linear(512,1)
#softmax activation function
self.softmax = nn.LogSoftmax(dim=1)
#define the forward pass
def forward(self, input_ids, attention_mask ):
#pass the inputs to the model
_, cls_hs = self.bert(input_ids, attention_mask,return_dict=False)
x = self.fc1(cls_hs)
x = self.relu(x)
x = self.dropout(x)
# output layer
x = self.fc2(x)
# apply softmax activation
x = self.softmax(x)
return x
from transformers import AutoModel
# import BERT-base pretrained model
bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
from transformers.models.bert.modeling_bert import BertForSequenceClassification
def init_net(parameterization):
model = BERT_Arch(bert) #pretrained ResNet50
# push the model to GPU
model = model.to(device)
# The depth of unfreezing is also a hyperparameter
for param in model.parameters():
param.requires_grad = False # Freeze feature extractor
return model # return untrained model
def train_evaluate(parameterization):
# constructing a new training data loader allows us to tune the batch size
train_data_loader=create_data_loader(df_train, tokenizer, MAX_LEN, batch_size=parameterization.get("batchsize", 32))
# Get neural net
untrained_net = init_net(parameterization)
# train
trained_net = net_train(net=untrained_net, train_data_loader=train_data_loader,
parameters=parameterization, dtype=dtype, device=device)
# return the accuracy of the model as it was trained in this run
return evaluate(
net=trained_net,
data_loader=test_data_loader,
dtype=dtype,
device=device,
)
dtype = torch.float
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
best_parameters, values, experiment, model = optimize(
parameters=[
{"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True},
{"name": "batchsize", "type": "range", "bounds": [16, 128]},
{"name": "momentum", "type": "range", "bounds": [0.0, 1.0]},
#{"name": "max_epoch", "type": "range", "bounds": [1, 30]},
#{"name": "stepsize", "type": "range", "bounds": [20, 40]},
],
evaluation_function=train_evaluate,
objective_name='accuracy',
)
print(best_parameters)
means, covariances = values
print(means)
print(covariances)
File "<ipython-input-61-aa60b2f44317>", line 35, in net_train
outputs,x= net(input_ids, attention_mask,return_dict=True)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
TypeError: forward() got an unexpected keyword argument 'return_dict'
I'm stacked with this model, every day errors came to my code! Anyway I'm trying to implement a Bert Classifier to discriminate between 2 sequences classes (BINARY CLASSIFICATION), with AX hyperparameters tuning.
This is all my code implemented anticipated by a sample of my datasets ( I have 3 csv, train-test-val). Thank you very much !
df_train=pd.read_csv('CLASSIFIER_train',sep=',',header=None)
df_train
0 1
M A T T D R P T P D G T D A I D L T T R V R R... 1
M K K L F Q T E P L L E L F N C N E L R I I G... 0
M L V A A A V C P H P P L L I P E L A A G A A... 1
M I V A W G N S G S G L L I L I L S L A V S A... 0
M V E E G R R L A A L H P N I V V K L P T T E... 1
M G S K V S K N A L V F N V L Q A L R E G L T... 1
M P S K E T S P A E R M A R D E Y Y M R L A M... 1
M V K E Y A L E W I D G Y R E R L V K V S D A... 1
M G T A A S Q D R A A M A E A A Q R V G D S F... 0
class SequenceDataset(Dataset):
def __init__(self, sequences, targets, tokenizer, max_len):
self.sequences = sequences
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.sequences)
def __getitem__(self, item):
sequences = str(self.sequences[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
sequences,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'sequences_text': sequences,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
class SequenceDataset(Dataset):
def __init__(self, sequences, targets, tokenizer, max_len):
self.sequences = sequences
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.sequences)
def __getitem__(self, item):
sequences = str(self.sequences[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
sequences,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'sequences_text': sequences,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = SequenceDataset(
sequences=df[0].to_numpy(),
targets=df[1].to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=2,
shuffle=True
)
def net_train(net, train_data_loader, parameters, dtype, device):
net.to(dtype=dtype, device=device)
# Define loss and optimizer
#criterion = nn.CrossEntropyLoss()
criterion = nn.NLLLoss()
optimizer = optim.SGD(net.parameters(), # or any optimizer you prefer
lr=parameters.get("lr", 0.001), # 0.001 is used if no lr is specified
momentum=parameters.get("momentum", 0.9)
)
scheduler = optim.lr_scheduler.StepLR(
optimizer,
step_size=int(parameters.get("step_size", 30)),
gamma=parameters.get("gamma", 1.0), # default is no learning rate decay
)
num_epochs = parameters.get("num_epochs", 3) # Play around with epoch number
# Train Network
# Train Network
for _ in range(num_epochs):
# Your dataloader returns a dictionary
# so access it as such
for batch in train_data_loader:
# move data to proper dtype and device
labels = batch['targets'].to(device=device)
attention_mask = batch['attention_mask'].to(device=device)
input_ids = batch['input_ids'].to(device=device)
#labels = labels.long()
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs,x= net(input_ids, attention_mask,return_dict=True)
#outputs,x= net(input_ids,atten_mask)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
scheduler.step()
return net
class BERT_Arch(nn.Module):
def __init__(self, bert):
super(BERT_Arch, self).__init__()
self.bert = bert
# dropout layer
self.dropout = nn.Dropout(0.1)
# relu activation function
self.relu = nn.ReLU()
# dense layer 1
self.fc1 = nn.Linear(1024,512)
# dense layer 2 (Output layer)
self.fc2 = nn.Linear(512,1)
#softmax activation function
self.softmax = nn.LogSoftmax(dim=1)
#define the forward pass
def forward(self, input_ids, attention_mask ):
#pass the inputs to the model
_, cls_hs = self.bert(input_ids, attention_mask,return_dict=False)
x = self.fc1(cls_hs)
x = self.relu(x)
x = self.dropout(x)
# output layer
x = self.fc2(x)
# apply softmax activation
x = self.softmax(x)
return x
from transformers import AutoModel
# import BERT-base pretrained model
bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
from transformers.models.bert.modeling_bert import BertForSequenceClassification
def init_net(parameterization):
model = BERT_Arch(bert) #pretrained ResNet50
# push the model to GPU
model = model.to(device)
# The depth of unfreezing is also a hyperparameter
for param in model.parameters():
param.requires_grad = False # Freeze feature extractor
return model # return untrained model
def train_evaluate(parameterization):
# constructing a new training data loader allows us to tune the batch size
train_data_loader=create_data_loader(df_train, tokenizer, MAX_LEN, batch_size=parameterization.get("batchsize", 32))
# Get neural net
untrained_net = init_net(parameterization)
# train
trained_net = net_train(net=untrained_net, train_data_loader=train_data_loader,
parameters=parameterization, dtype=dtype, device=device)
# return the accuracy of the model as it was trained in this run
return evaluate(
net=trained_net,
data_loader=test_data_loader,
dtype=dtype,
device=device,
)
dtype = torch.float
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
best_parameters, values, experiment, model = optimize(
parameters=[
{"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True},
{"name": "batchsize", "type": "range", "bounds": [16, 128]},
{"name": "momentum", "type": "range", "bounds": [0.0, 1.0]},
#{"name": "max_epoch", "type": "range", "bounds": [1, 30]},
#{"name": "stepsize", "type": "range", "bounds": [20, 40]},
],
evaluation_function=train_evaluate,
objective_name='accuracy',
)
print(best_parameters)
means, covariances = values
print(means)
print(covariances)
File "<ipython-input-61-aa60b2f44317>", line 35, in net_train
outputs,x= net(input_ids, attention_mask,return_dict=True)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
TypeError: forward() got an unexpected keyword argument 'return_dict'
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
在
net_train
中,您调用:但您的对象
net
仅接受两个参数,尽管self
已在BERT_Arch
中定义:您可能想要添加一个附加参数并在前向传递中使用它:
In
net_train
you call:but your object
net
only accepts two parameters despiteself
as defined inBERT_Arch
:You probably want to add an additional parameter and use it in the forward pass: