Runtimeerror:批次列表中的每个元素在Bert中的大小应相等
现在我想训练伯特模型。
但是发生RuntimeError。
我不知道如何解决此错误。
请给我一些帮助。
这是我的代码。
我在 Python = 3.8.10 Pytorch = 1.8.0
我使用了IMDB数据集。
我尝试了更改版本并检查数据。 数据集是可变数据。
如何处理可变数据? 请给我一些提示。
def pretraining(
model: MLMandNSPmodel,
model_name: str,
train_dataset: PretrainDataset,
val_dataset: PretrainDataset,
):
# Below options are just our recommendation. You can choose different options if you want.
batch_size = 8
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 200 # 200 if you want to feel the effect of pretraining
steps_per_a_epoch: int=2000
steps_for_val: int=200
### YOUR CODE HERE
# pretraining(model, model_name, train_dataset, val_dataset)
MLM_train_losses: List[float] = None
MLM_val_losses: List[float] = None
NSP_train_losses: List[float] = None
NSP_val_losses: List[float] = None
MLM_train_losses = []
MLM_val_losses = []
NSP_train_losses = []
NSP_val_losses = []
print('')
print(train_dataset)
print(val_dataset)
print('')
train_data_iterator = iter(
torch.utils.data.dataloader.DataLoader(train_dataset, batch_size=batch_size, num_workers=2, shuffle=False))
# train_data_iterator = torch.utils.data.dataloader.DataLoader(train_dataset, batch_size=batch_size, num_workers=2, shuffle=False)
eval_data_iterator = iter(
torch.utils.data.dataloader.DataLoader(val_dataset, batch_size=batch_size, num_workers=2, shuffle=False))
# eval_data_iterator = torch.utils.data.dataloader.DataLoader(val_dataset, batch_size=batch_size, num_workers=2, shuffle=False)
loss_log = tqdm(total=0, bar_format='{desc}')
i = 0
for epoch in trange(epochs, desc="Epoch", position=0):
i += 1
# Run batches for 'steps_per_a_epoch' times
MLM_loss = 0
NSP_loss = 0
model.train()
for step in trange(steps_per_a_epoch, desc="Training steps"):
optimizer.zero_grad()
src, mlm, mask, nsp = next(train_data_iterator)
mlm_loss, nsp_loss = calculate_losses(model, src, mlm, mask, nsp)
MLM_loss += mlm_loss
NSP_loss += nsp_loss
loss = mlm_loss + nsp_loss
loss.backward()
optimizer.step()
des = 'Loss: {:06.4f}'.format(loss.cpu())
loss_log.set_description_str(des)
# Calculate training loss
MLM_loss = MLM_loss / steps_per_a_epoch
NSP_loss = NSP_loss / steps_per_a_epoch
MLM_train_losses.append(float(MLM_loss.data))
NSP_train_losses.append(float(NSP_loss.data))
# Calculate valid loss
model.eval()
valid_mlm_loss = 0.
valid_nsp_loss = 0.
for step in trange(steps_for_val, desc="Evaluation steps"):
src, mlm, mask, nsp = next(eval_data_iterator)
mlm_loss, nsp_loss = calculate_losses(model, src, mlm, mask, nsp)
valid_mlm_loss += mlm_loss
valid_nsp_loss += nsp_loss
valid_mlm_loss = valid_mlm_loss / steps_for_val
valid_nsp_loss = valid_nsp_loss / steps_for_val
MLM_val_losses.append(float(valid_mlm_loss.data))
NSP_val_losses.append(float(valid_nsp_loss.data))
torch.save(model.state_dict(), os.path.join('/home/ml/Desktop/song/HW3/hw3/',model_name + str(i)+'.pth'))
### END YOUR CODE
assert len(MLM_train_losses) == len(MLM_val_losses) == epochs and \
len(NSP_train_losses) == len(NSP_val_losses) == epochs
assert all(isinstance(loss, float) for loss in MLM_train_losses) and \
all(isinstance(loss, float) for loss in MLM_val_losses) and \
all(isinstance(loss, float) for loss in NSP_train_losses) and \
all(isinstance(loss, float) for loss in NSP_val_losses)
return MLM_train_losses, MLM_val_losses, NSP_train_losses, NSP_val_losses
这是错误消息
(hw3) ml@automl03:~/Desktop/song/HW3/hw3$ python pretrain.py
======MLM & NSP Pretraining======
<__main__.PretrainDataset object at 0x7fa72117eb80>
<__main__.PretrainDataset object at 0x7fa70d0a2a30>
<torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7fa70c8b3640>
<torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7fa70c8b3670>
Training steps: 0%| | 0/2000 [00:00<?, ?it/s]
Epoch: 0%| | 0/200 [00:00<?, ?it/s]
Traceback (most recent call last):
File "pretrain.py", line 527, in <module>
pretrain_model()
File "pretrain.py", line 505, in pretrain_model
= pretraining(model, model_name, train_dataset, val_dataset)
File "pretrain.py", line 316, in pretraining
src, mlm, mask, nsp = next(train_data_iterator)
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 517, in __next__
data = self._next_data()
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1199, in _next_data
return self._process_data(data)
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1225, in _process_data
data.reraise()
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/_utils.py", line 429, in reraise
raise self.exc_type(msg)
RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop
data = fetcher.fetch(index)
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 35, in fetch
return self.collate_fn(data)
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 83, in default_collate
return [default_collate(samples) for samples in transposed]
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 83, in <listcomp>
return [default_collate(samples) for samples in transposed]
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 81, in default_collate
raise RuntimeError('each element in list of batch should be of equal size')
RuntimeError: each element in list of batch should be of equal siz
Now i want to train bert model.
But runtimeError occured.
I don't know how to solve this error.
Please give me some help.
Here is my code.
I excute this code in
python = 3.8.10
pytorch = 1.8.0
I used IMDB datasets.
I tried change version and check data.
And the dataset is variable data.
How to handle variable data?
Please give me some tips.
def pretraining(
model: MLMandNSPmodel,
model_name: str,
train_dataset: PretrainDataset,
val_dataset: PretrainDataset,
):
# Below options are just our recommendation. You can choose different options if you want.
batch_size = 8
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 200 # 200 if you want to feel the effect of pretraining
steps_per_a_epoch: int=2000
steps_for_val: int=200
### YOUR CODE HERE
# pretraining(model, model_name, train_dataset, val_dataset)
MLM_train_losses: List[float] = None
MLM_val_losses: List[float] = None
NSP_train_losses: List[float] = None
NSP_val_losses: List[float] = None
MLM_train_losses = []
MLM_val_losses = []
NSP_train_losses = []
NSP_val_losses = []
print('')
print(train_dataset)
print(val_dataset)
print('')
train_data_iterator = iter(
torch.utils.data.dataloader.DataLoader(train_dataset, batch_size=batch_size, num_workers=2, shuffle=False))
# train_data_iterator = torch.utils.data.dataloader.DataLoader(train_dataset, batch_size=batch_size, num_workers=2, shuffle=False)
eval_data_iterator = iter(
torch.utils.data.dataloader.DataLoader(val_dataset, batch_size=batch_size, num_workers=2, shuffle=False))
# eval_data_iterator = torch.utils.data.dataloader.DataLoader(val_dataset, batch_size=batch_size, num_workers=2, shuffle=False)
loss_log = tqdm(total=0, bar_format='{desc}')
i = 0
for epoch in trange(epochs, desc="Epoch", position=0):
i += 1
# Run batches for 'steps_per_a_epoch' times
MLM_loss = 0
NSP_loss = 0
model.train()
for step in trange(steps_per_a_epoch, desc="Training steps"):
optimizer.zero_grad()
src, mlm, mask, nsp = next(train_data_iterator)
mlm_loss, nsp_loss = calculate_losses(model, src, mlm, mask, nsp)
MLM_loss += mlm_loss
NSP_loss += nsp_loss
loss = mlm_loss + nsp_loss
loss.backward()
optimizer.step()
des = 'Loss: {:06.4f}'.format(loss.cpu())
loss_log.set_description_str(des)
# Calculate training loss
MLM_loss = MLM_loss / steps_per_a_epoch
NSP_loss = NSP_loss / steps_per_a_epoch
MLM_train_losses.append(float(MLM_loss.data))
NSP_train_losses.append(float(NSP_loss.data))
# Calculate valid loss
model.eval()
valid_mlm_loss = 0.
valid_nsp_loss = 0.
for step in trange(steps_for_val, desc="Evaluation steps"):
src, mlm, mask, nsp = next(eval_data_iterator)
mlm_loss, nsp_loss = calculate_losses(model, src, mlm, mask, nsp)
valid_mlm_loss += mlm_loss
valid_nsp_loss += nsp_loss
valid_mlm_loss = valid_mlm_loss / steps_for_val
valid_nsp_loss = valid_nsp_loss / steps_for_val
MLM_val_losses.append(float(valid_mlm_loss.data))
NSP_val_losses.append(float(valid_nsp_loss.data))
torch.save(model.state_dict(), os.path.join('/home/ml/Desktop/song/HW3/hw3/',model_name + str(i)+'.pth'))
### END YOUR CODE
assert len(MLM_train_losses) == len(MLM_val_losses) == epochs and \
len(NSP_train_losses) == len(NSP_val_losses) == epochs
assert all(isinstance(loss, float) for loss in MLM_train_losses) and \
all(isinstance(loss, float) for loss in MLM_val_losses) and \
all(isinstance(loss, float) for loss in NSP_train_losses) and \
all(isinstance(loss, float) for loss in NSP_val_losses)
return MLM_train_losses, MLM_val_losses, NSP_train_losses, NSP_val_losses
And this is error message
(hw3) ml@automl03:~/Desktop/song/HW3/hw3$ python pretrain.py
======MLM & NSP Pretraining======
<__main__.PretrainDataset object at 0x7fa72117eb80>
<__main__.PretrainDataset object at 0x7fa70d0a2a30>
<torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7fa70c8b3640>
<torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7fa70c8b3670>
Training steps: 0%| | 0/2000 [00:00<?, ?it/s]
Epoch: 0%| | 0/200 [00:00<?, ?it/s]
Traceback (most recent call last):
File "pretrain.py", line 527, in <module>
pretrain_model()
File "pretrain.py", line 505, in pretrain_model
= pretraining(model, model_name, train_dataset, val_dataset)
File "pretrain.py", line 316, in pretraining
src, mlm, mask, nsp = next(train_data_iterator)
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 517, in __next__
data = self._next_data()
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1199, in _next_data
return self._process_data(data)
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1225, in _process_data
data.reraise()
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/_utils.py", line 429, in reraise
raise self.exc_type(msg)
RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop
data = fetcher.fetch(index)
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 35, in fetch
return self.collate_fn(data)
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 83, in default_collate
return [default_collate(samples) for samples in transposed]
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 83, in <listcomp>
return [default_collate(samples) for samples in transposed]
File "/home/ml/anaconda3/envs/hw3/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 81, in default_collate
raise RuntimeError('each element in list of batch should be of equal size')
RuntimeError: each element in list of batch should be of equal siz
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论