类型错误：forward() 得到了意外的关键字参数“return_dict” BERT 分类 HUGGIFACE 与调优

发布于 2025-01-13 07:42:59 字数 7607 浏览 2 评论 0原文

我堆满了这个模型，每天我的代码都会出现错误！无论如何，我正在尝试实现一个 Bert 分类器来区分 2 个序列类（二进制分类），并使用 AX 超参数调整。这是我的数据集样本预期实现的所有代码（我有 3 个 csv，train-test-val）。非常感谢！

df_train=pd.read_csv('CLASSIFIER_train',sep=',',header=None)
df_train
                                               0        1
    M A T T D R P T P D G T D A I D L T T R V R R...    1
    M K K L F Q T E P L L E L F N C N E L R I I G...    0
    M L V A A A V C P H P P L L I P E L A A G A A...    1
    M I V A W G N S G S G L L I L I L S L A V S A...    0
    M V E E G R R L A A L H P N I V V K L P T T E...    1
    M G S K V S K N A L V F N V L Q A L R E G L T...    1
    M P S K E T S P A E R M A R D E Y Y M R L A M...    1
    M V K E Y A L E W I D G Y R E R L V K V S D A...    1
    M G T A A S Q D R A A M A E A A Q R V G D S F...    0

class SequenceDataset(Dataset):

  def __init__(self, sequences, targets, tokenizer, max_len):
    self.sequences = sequences
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len
  
  def __len__(self):
    return len(self.sequences)
  
  def __getitem__(self, item):
    sequences = str(self.sequences[item])
    target = self.targets[item]

    encoding = self.tokenizer.encode_plus(
      sequences,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      pad_to_max_length=True,
      return_attention_mask=True,
      return_tensors='pt',
    )

    return {
      'sequences_text': sequences,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'targets': torch.tensor(target, dtype=torch.long)
    }
    
class SequenceDataset(Dataset):

  def __init__(self, sequences, targets, tokenizer, max_len):
    self.sequences = sequences
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len
  
  def __len__(self):
    return len(self.sequences)
  
  def __getitem__(self, item):
    sequences = str(self.sequences[item])
    target = self.targets[item]

    encoding = self.tokenizer.encode_plus(
      sequences,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      pad_to_max_length=True,
      return_attention_mask=True,
      return_tensors='pt',
    )

    return {
      'sequences_text': sequences,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'targets': torch.tensor(target, dtype=torch.long)
    }

def create_data_loader(df, tokenizer, max_len, batch_size):
  ds = SequenceDataset(
    sequences=df[0].to_numpy(),
    targets=df[1].to_numpy(),
    tokenizer=tokenizer,
    max_len=max_len
  )

  return DataLoader(
    ds,
    batch_size=batch_size,
    num_workers=2,
    shuffle=True
  )

def net_train(net, train_data_loader, parameters, dtype, device):
  net.to(dtype=dtype, device=device)

  # Define loss and optimizer
  #criterion = nn.CrossEntropyLoss()
  criterion = nn.NLLLoss()
  optimizer = optim.SGD(net.parameters(), # or any optimizer you prefer 
                        lr=parameters.get("lr", 0.001), # 0.001 is used if no lr is specified
                        momentum=parameters.get("momentum", 0.9)
  )

  scheduler = optim.lr_scheduler.StepLR(
      optimizer,
      step_size=int(parameters.get("step_size", 30)),
      gamma=parameters.get("gamma", 1.0),  # default is no learning rate decay
  )

  num_epochs = parameters.get("num_epochs", 3) # Play around with epoch number
  # Train Network

# Train Network
  for _ in range(num_epochs):
      # Your dataloader returns a dictionary
      # so access it as such
      for batch in train_data_loader:
          # move data to proper dtype and device
          labels = batch['targets'].to(device=device)
          attention_mask = batch['attention_mask'].to(device=device)
          input_ids = batch['input_ids'].to(device=device)
          #labels = labels.long()
          # zero the parameter gradients
          optimizer.zero_grad()

          # forward + backward + optimize
          outputs,x= net(input_ids, attention_mask,return_dict=True)
          #outputs,x= net(input_ids,atten_mask)


          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()
          scheduler.step()
  return net
  
  
class BERT_Arch(nn.Module):

    def __init__(self, bert):
      
      super(BERT_Arch, self).__init__()

      self.bert = bert 
      
      # dropout layer
      self.dropout = nn.Dropout(0.1)
      
      # relu activation function
      self.relu =  nn.ReLU()

      # dense layer 1
      
      self.fc1 = nn.Linear(1024,512)
      
      # dense layer 2 (Output layer)
      self.fc2 = nn.Linear(512,1)

      #softmax activation function
      self.softmax = nn.LogSoftmax(dim=1)

    #define the forward pass
    def forward(self, input_ids, attention_mask ):

      #pass the inputs to the model  
      _, cls_hs = self.bert(input_ids, attention_mask,return_dict=False)
      
      x = self.fc1(cls_hs)

      x = self.relu(x)

      x = self.dropout(x)

      # output layer
      x = self.fc2(x)
      
      # apply softmax activation
      x = self.softmax(x)

      return x

from transformers import AutoModel
# import BERT-base pretrained model
bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)

from transformers.models.bert.modeling_bert import BertForSequenceClassification
def init_net(parameterization):

    model = BERT_Arch(bert) #pretrained ResNet50

    # push the model to GPU
    model = model.to(device)

    # The depth of unfreezing is also a hyperparameter
    for param in model.parameters():
        param.requires_grad = False # Freeze feature extractor
        

    return model # return untrained model

def train_evaluate(parameterization):

    # constructing a new training data loader allows us to tune the batch size


    train_data_loader=create_data_loader(df_train, tokenizer, MAX_LEN, batch_size=parameterization.get("batchsize", 32))
    
    
    # Get neural net
    untrained_net = init_net(parameterization) 
    
    # train
    trained_net = net_train(net=untrained_net, train_data_loader=train_data_loader, 
                            parameters=parameterization, dtype=dtype, device=device)
    
    # return the accuracy of the model as it was trained in this run
    return evaluate(
        net=trained_net,
        data_loader=test_data_loader,
        dtype=dtype,
        device=device,
    )

dtype = torch.float
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

best_parameters, values, experiment, model = optimize(
    parameters=[
        {"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True},
        {"name": "batchsize", "type": "range", "bounds": [16, 128]},
        {"name": "momentum", "type": "range", "bounds": [0.0, 1.0]},
        #{"name": "max_epoch", "type": "range", "bounds": [1, 30]},
        #{"name": "stepsize", "type": "range", "bounds": [20, 40]},        
    ],
  
    evaluation_function=train_evaluate,
    objective_name='accuracy',
)

print(best_parameters)
means, covariances = values
print(means)
print(covariances)


File "<ipython-input-61-aa60b2f44317>", line 35, in net_train
    outputs,x= net(input_ids, attention_mask,return_dict=True)
  File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
TypeError: forward() got an unexpected keyword argument 'return_dict'

原文

I'm stacked with this model, every day errors came to my code! Anyway I'm trying to implement a Bert Classifier to discriminate between 2 sequences classes (BINARY CLASSIFICATION), with AX hyperparameters tuning.
This is all my code implemented anticipated by a sample of my datasets ( I have 3 csv, train-test-val). Thank you very much !

df_train=pd.read_csv('CLASSIFIER_train',sep=',',header=None)
df_train
                                               0        1
    M A T T D R P T P D G T D A I D L T T R V R R...    1
    M K K L F Q T E P L L E L F N C N E L R I I G...    0
    M L V A A A V C P H P P L L I P E L A A G A A...    1
    M I V A W G N S G S G L L I L I L S L A V S A...    0
    M V E E G R R L A A L H P N I V V K L P T T E...    1
    M G S K V S K N A L V F N V L Q A L R E G L T...    1
    M P S K E T S P A E R M A R D E Y Y M R L A M...    1
    M V K E Y A L E W I D G Y R E R L V K V S D A...    1
    M G T A A S Q D R A A M A E A A Q R V G D S F...    0

class SequenceDataset(Dataset):

  def __init__(self, sequences, targets, tokenizer, max_len):
    self.sequences = sequences
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len
  
  def __len__(self):
    return len(self.sequences)
  
  def __getitem__(self, item):
    sequences = str(self.sequences[item])
    target = self.targets[item]

    encoding = self.tokenizer.encode_plus(
      sequences,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      pad_to_max_length=True,
      return_attention_mask=True,
      return_tensors='pt',
    )

    return {
      'sequences_text': sequences,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'targets': torch.tensor(target, dtype=torch.long)
    }
    
class SequenceDataset(Dataset):

  def __init__(self, sequences, targets, tokenizer, max_len):
    self.sequences = sequences
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len
  
  def __len__(self):
    return len(self.sequences)
  
  def __getitem__(self, item):
    sequences = str(self.sequences[item])
    target = self.targets[item]

    encoding = self.tokenizer.encode_plus(
      sequences,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      pad_to_max_length=True,
      return_attention_mask=True,
      return_tensors='pt',
    )

    return {
      'sequences_text': sequences,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'targets': torch.tensor(target, dtype=torch.long)
    }

def create_data_loader(df, tokenizer, max_len, batch_size):
  ds = SequenceDataset(
    sequences=df[0].to_numpy(),
    targets=df[1].to_numpy(),
    tokenizer=tokenizer,
    max_len=max_len
  )

  return DataLoader(
    ds,
    batch_size=batch_size,
    num_workers=2,
    shuffle=True
  )

def net_train(net, train_data_loader, parameters, dtype, device):
  net.to(dtype=dtype, device=device)

  # Define loss and optimizer
  #criterion = nn.CrossEntropyLoss()
  criterion = nn.NLLLoss()
  optimizer = optim.SGD(net.parameters(), # or any optimizer you prefer 
                        lr=parameters.get("lr", 0.001), # 0.001 is used if no lr is specified
                        momentum=parameters.get("momentum", 0.9)
  )

  scheduler = optim.lr_scheduler.StepLR(
      optimizer,
      step_size=int(parameters.get("step_size", 30)),
      gamma=parameters.get("gamma", 1.0),  # default is no learning rate decay
  )

  num_epochs = parameters.get("num_epochs", 3) # Play around with epoch number
  # Train Network

# Train Network
  for _ in range(num_epochs):
      # Your dataloader returns a dictionary
      # so access it as such
      for batch in train_data_loader:
          # move data to proper dtype and device
          labels = batch['targets'].to(device=device)
          attention_mask = batch['attention_mask'].to(device=device)
          input_ids = batch['input_ids'].to(device=device)
          #labels = labels.long()
          # zero the parameter gradients
          optimizer.zero_grad()

          # forward + backward + optimize
          outputs,x= net(input_ids, attention_mask,return_dict=True)
          #outputs,x= net(input_ids,atten_mask)


          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()
          scheduler.step()
  return net
  
  
class BERT_Arch(nn.Module):

    def __init__(self, bert):
      
      super(BERT_Arch, self).__init__()

      self.bert = bert 
      
      # dropout layer
      self.dropout = nn.Dropout(0.1)
      
      # relu activation function
      self.relu =  nn.ReLU()

      # dense layer 1
      
      self.fc1 = nn.Linear(1024,512)
      
      # dense layer 2 (Output layer)
      self.fc2 = nn.Linear(512,1)

      #softmax activation function
      self.softmax = nn.LogSoftmax(dim=1)

    #define the forward pass
    def forward(self, input_ids, attention_mask ):

      #pass the inputs to the model  
      _, cls_hs = self.bert(input_ids, attention_mask,return_dict=False)
      
      x = self.fc1(cls_hs)

      x = self.relu(x)

      x = self.dropout(x)

      # output layer
      x = self.fc2(x)
      
      # apply softmax activation
      x = self.softmax(x)

      return x

from transformers import AutoModel
# import BERT-base pretrained model
bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)

from transformers.models.bert.modeling_bert import BertForSequenceClassification
def init_net(parameterization):

    model = BERT_Arch(bert) #pretrained ResNet50

    # push the model to GPU
    model = model.to(device)

    # The depth of unfreezing is also a hyperparameter
    for param in model.parameters():
        param.requires_grad = False # Freeze feature extractor
        

    return model # return untrained model

def train_evaluate(parameterization):

    # constructing a new training data loader allows us to tune the batch size


    train_data_loader=create_data_loader(df_train, tokenizer, MAX_LEN, batch_size=parameterization.get("batchsize", 32))
    
    
    # Get neural net
    untrained_net = init_net(parameterization) 
    
    # train
    trained_net = net_train(net=untrained_net, train_data_loader=train_data_loader, 
                            parameters=parameterization, dtype=dtype, device=device)
    
    # return the accuracy of the model as it was trained in this run
    return evaluate(
        net=trained_net,
        data_loader=test_data_loader,
        dtype=dtype,
        device=device,
    )

dtype = torch.float
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

best_parameters, values, experiment, model = optimize(
    parameters=[
        {"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True},
        {"name": "batchsize", "type": "range", "bounds": [16, 128]},
        {"name": "momentum", "type": "range", "bounds": [0.0, 1.0]},
        #{"name": "max_epoch", "type": "range", "bounds": [1, 30]},
        #{"name": "stepsize", "type": "range", "bounds": [20, 40]},        
    ],
  
    evaluation_function=train_evaluate,
    objective_name='accuracy',
)

print(best_parameters)
means, covariances = values
print(means)
print(covariances)


File "<ipython-input-61-aa60b2f44317>", line 35, in net_train
    outputs,x= net(input_ids, attention_mask,return_dict=True)
  File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
TypeError: forward() got an unexpected keyword argument 'return_dict'

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

浪漫人生路 2025-01-20 07:42:59

在 net_train 中，您调用：

outputs,x= net(input_ids, attention_mask,return_dict=True)

但您的对象 net 仅接受两个参数，尽管 self 已在 BERT_Arch 中定义：

class BERT_Arch(nn.Module):
    def __init__(self, bert):
        ...      
    #define the forward pass
    def forward(self, input_ids, attention_mask ):

您可能想要添加一个附加参数并在前向传递中使用它：

class BERT_Arch(nn.Module):
    def __init__(self, bert):
      ...

    #define the forward pass
    def forward(self, input_ids, attention_mask, return_dict):

      #pass the inputs to the model  
      _, cls_hs = self.bert(input_ids, attention_mask,return_dict=return_dict)
      
      ...
      return x

In net_train you call:

outputs,x= net(input_ids, attention_mask,return_dict=True)

but your object net only accepts two parameters despite self as defined in BERT_Arch:

class BERT_Arch(nn.Module):
    def __init__(self, bert):
        ...      
    #define the forward pass
    def forward(self, input_ids, attention_mask ):

You probably want to add an additional parameter and use it in the forward pass:

class BERT_Arch(nn.Module):
    def __init__(self, bert):
      ...

    #define the forward pass
    def forward(self, input_ids, attention_mask, return_dict):

      #pass the inputs to the model  
      _, cls_hs = self.bert(input_ids, attention_mask,return_dict=return_dict)
      
      ...
      return x

回复收藏 0 原文

~没有更多了~