ValueError:无法创建张量,您可能应该使用' padding = true'

发布于 2025-01-18 22:37:05 字数 7635 浏览 2 评论 0原文

我正在尝试评估facebook/hubert-base-ls9601在私人数据集上进行微调后,拥抱面前训练的模型。

我正在使用facebook/hubert-base-ls9601预先训练的模型和wav2Vec2功能提取器,并将池模式设置为Means

这是评估代码:

test_dataset = load_dataset("csv", data_files={"test": "/content/drive/MyDrive/freelancing/test.csv"}, delimiter="\t")["test"]

def speech_file_to_array_fn(batch):
    speech_array, sampling_rate = torchaudio.load(batch["path"])
    resampler = torchaudio.transforms.Resample(sampling_rate, target_sampling_rate)
    speech = resampler(speech_array).squeeze().numpy()
    batch["speech"] = speech_array
    return batch


def predict(batch):
    features = feature_extractor(batch["speech"], sampling_rate=feature_extractor.sampling_rate, return_tensors="pt", padding=True)

    input_values = features.input_values.to(device)

    with torch.no_grad():
        logits = model(input_values).logits 

    pred_ids = torch.argmax(logits, dim=-1).detach().cpu().numpy()
    batch["predicted"] = pred_ids
    return batch

test_dataset = test_dataset.map(speech_file_to_array_fn)
result = test_dataset.map(predict, batched=True, batch_size=2)

在最后一行,我遇到以下错误块:

---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_utils.py in convert_to_tensors(self, tensor_type)
    168                 if not is_tensor(value):
--> 169                     tensor = as_tensor(value)
    170 

ValueError: could not broadcast input array from shape (2,220683) into shape (2,)


During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)

12 frames

<ipython-input-73-7bd88adad349> in <module>()
----> 1 result = test_dataset.map(predict, batched=True, batch_size=2)

/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in map(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)
   1970                 new_fingerprint=new_fingerprint,
   1971                 disable_tqdm=disable_tqdm,
-> 1972                 desc=desc,
   1973             )
   1974         else:

/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in wrapper(*args, **kwargs)
    517             self: "Dataset" = kwargs.pop("self")
    518         # apply actual function
--> 519         out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
    520         datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [out]
    521         for dataset in datasets:

/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in wrapper(*args, **kwargs)
    484         }
    485         # apply actual function
--> 486         out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
    487         datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [out]
    488         # re-apply format to the output

/usr/local/lib/python3.7/dist-packages/datasets/fingerprint.py in wrapper(*args, **kwargs)
    456             # Call actual function
    457 
--> 458             out = func(self, *args, **kwargs)
    459 
    460             # Update fingerprint of in-place transforms + update in-place history of transforms

/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in _map_single(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, new_fingerprint, rank, offset, disable_tqdm, desc, cache_only)
   2340                                 indices,
   2341                                 check_same_num_examples=len(input_dataset.list_indexes()) > 0,
-> 2342                                 offset=offset,
   2343                             )
   2344                         except NumExamplesMismatchError:

/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in apply_function_on_filtered_inputs(inputs, indices, check_same_num_examples, offset)
   2217             if with_rank:
   2218                 additional_args += (rank,)
-> 2219             processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
   2220             if update_data is None:
   2221                 # Check if the function returns updated examples

/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in decorated(item, *args, **kwargs)
   1912                 )
   1913                 # Use the LazyDict internally, while mapping the function
-> 1914                 result = f(decorated_item, *args, **kwargs)
   1915                 # Return a standard dict
   1916                 return result.data if isinstance(result, LazyDict) else result

<ipython-input-71-6f845da29c00> in predict(batch)
     11 
     12 def predict(batch):
---> 13     features = feature_extractor(batch["speech"], sampling_rate=feature_extractor.sampling_rate, return_tensors="pt", padding=True)
     14 
     15     input_values = features.input_values.to(device)

/usr/local/lib/python3.7/dist-packages/transformers/models/wav2vec2/feature_extraction_wav2vec2.py in __call__(self, raw_speech, padding, max_length, truncation, pad_to_multiple_of, return_attention_mask, return_tensors, sampling_rate, **kwargs)
    200             truncation=truncation,
    201             pad_to_multiple_of=pad_to_multiple_of,
--> 202             return_attention_mask=return_attention_mask,
    203         )
    204 

/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_sequence_utils.py in pad(self, processed_features, padding, max_length, truncation, pad_to_multiple_of, return_attention_mask, return_tensors)
    230                 batch_outputs[key].append(value)
    231 
--> 232         return BatchFeature(batch_outputs, tensor_type=return_tensors)
    233 
    234     def _pad(

/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_utils.py in __init__(self, data, tensor_type)
     78     def __init__(self, data: Optional[Dict[str, Any]] = None, tensor_type: Union[None, str, TensorType] = None):
     79         super().__init__(data)
---> 80         self.convert_to_tensors(tensor_type=tensor_type)
     81 
     82     def __getitem__(self, item: str) -> Union[Any]:

/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_utils.py in convert_to_tensors(self, tensor_type)
    174                     raise ValueError("Unable to create tensor returning overflowing values of different lengths. ")
    175                 raise ValueError(
--> 176                     "Unable to create tensor, you should probably activate padding "
    177                     "with 'padding=True' to have batched tensors with the same length."
    178                 )

ValueError: Unable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length.

我正在使用Google COLAB。这些是环境变量:

%env LC_ALL=C.UTF-8
%env LANG=C.UTF-8
%env TRANSFORMERS_CACHE=/content/cache
%env HF_DATASETS_CACHE=/content/cache
%env CUDA_LAUNCH_BLOCKING=1

填充已在预测函数中激活。

您能帮我解决吗?

I am trying to evaluate facebook/hubert-base-ls9601 Huggingface pre-trained model after fine-tuning on a private dataset.

I am using facebook/hubert-base-ls9601 pre-trained model, and Wav2vec2 feature extractor, and pooling mode set to mean.

Here's the evaluation code:

test_dataset = load_dataset("csv", data_files={"test": "/content/drive/MyDrive/freelancing/test.csv"}, delimiter="\t")["test"]

def speech_file_to_array_fn(batch):
    speech_array, sampling_rate = torchaudio.load(batch["path"])
    resampler = torchaudio.transforms.Resample(sampling_rate, target_sampling_rate)
    speech = resampler(speech_array).squeeze().numpy()
    batch["speech"] = speech_array
    return batch


def predict(batch):
    features = feature_extractor(batch["speech"], sampling_rate=feature_extractor.sampling_rate, return_tensors="pt", padding=True)

    input_values = features.input_values.to(device)

    with torch.no_grad():
        logits = model(input_values).logits 

    pred_ids = torch.argmax(logits, dim=-1).detach().cpu().numpy()
    batch["predicted"] = pred_ids
    return batch

test_dataset = test_dataset.map(speech_file_to_array_fn)
result = test_dataset.map(predict, batched=True, batch_size=2)

On the last line of code, I encounter the following error block:

---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_utils.py in convert_to_tensors(self, tensor_type)
    168                 if not is_tensor(value):
--> 169                     tensor = as_tensor(value)
    170 

ValueError: could not broadcast input array from shape (2,220683) into shape (2,)


During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)

12 frames

<ipython-input-73-7bd88adad349> in <module>()
----> 1 result = test_dataset.map(predict, batched=True, batch_size=2)

/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in map(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)
   1970                 new_fingerprint=new_fingerprint,
   1971                 disable_tqdm=disable_tqdm,
-> 1972                 desc=desc,
   1973             )
   1974         else:

/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in wrapper(*args, **kwargs)
    517             self: "Dataset" = kwargs.pop("self")
    518         # apply actual function
--> 519         out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
    520         datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [out]
    521         for dataset in datasets:

/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in wrapper(*args, **kwargs)
    484         }
    485         # apply actual function
--> 486         out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
    487         datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [out]
    488         # re-apply format to the output

/usr/local/lib/python3.7/dist-packages/datasets/fingerprint.py in wrapper(*args, **kwargs)
    456             # Call actual function
    457 
--> 458             out = func(self, *args, **kwargs)
    459 
    460             # Update fingerprint of in-place transforms + update in-place history of transforms

/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in _map_single(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, new_fingerprint, rank, offset, disable_tqdm, desc, cache_only)
   2340                                 indices,
   2341                                 check_same_num_examples=len(input_dataset.list_indexes()) > 0,
-> 2342                                 offset=offset,
   2343                             )
   2344                         except NumExamplesMismatchError:

/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in apply_function_on_filtered_inputs(inputs, indices, check_same_num_examples, offset)
   2217             if with_rank:
   2218                 additional_args += (rank,)
-> 2219             processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
   2220             if update_data is None:
   2221                 # Check if the function returns updated examples

/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in decorated(item, *args, **kwargs)
   1912                 )
   1913                 # Use the LazyDict internally, while mapping the function
-> 1914                 result = f(decorated_item, *args, **kwargs)
   1915                 # Return a standard dict
   1916                 return result.data if isinstance(result, LazyDict) else result

<ipython-input-71-6f845da29c00> in predict(batch)
     11 
     12 def predict(batch):
---> 13     features = feature_extractor(batch["speech"], sampling_rate=feature_extractor.sampling_rate, return_tensors="pt", padding=True)
     14 
     15     input_values = features.input_values.to(device)

/usr/local/lib/python3.7/dist-packages/transformers/models/wav2vec2/feature_extraction_wav2vec2.py in __call__(self, raw_speech, padding, max_length, truncation, pad_to_multiple_of, return_attention_mask, return_tensors, sampling_rate, **kwargs)
    200             truncation=truncation,
    201             pad_to_multiple_of=pad_to_multiple_of,
--> 202             return_attention_mask=return_attention_mask,
    203         )
    204 

/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_sequence_utils.py in pad(self, processed_features, padding, max_length, truncation, pad_to_multiple_of, return_attention_mask, return_tensors)
    230                 batch_outputs[key].append(value)
    231 
--> 232         return BatchFeature(batch_outputs, tensor_type=return_tensors)
    233 
    234     def _pad(

/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_utils.py in __init__(self, data, tensor_type)
     78     def __init__(self, data: Optional[Dict[str, Any]] = None, tensor_type: Union[None, str, TensorType] = None):
     79         super().__init__(data)
---> 80         self.convert_to_tensors(tensor_type=tensor_type)
     81 
     82     def __getitem__(self, item: str) -> Union[Any]:

/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_utils.py in convert_to_tensors(self, tensor_type)
    174                     raise ValueError("Unable to create tensor returning overflowing values of different lengths. ")
    175                 raise ValueError(
--> 176                     "Unable to create tensor, you should probably activate padding "
    177                     "with 'padding=True' to have batched tensors with the same length."
    178                 )

ValueError: Unable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length.

I am working on Google Colab. Those are the environment variables:

%env LC_ALL=C.UTF-8
%env LANG=C.UTF-8
%env TRANSFORMERS_CACHE=/content/cache
%env HF_DATASETS_CACHE=/content/cache
%env CUDA_LAUNCH_BLOCKING=1

The padding is already activated in the predict function.

Can you please help me fix it?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文