在Ubuntu中使用Python,培训停止,并进行描述:分割故障(核心倾倒)
我正在研究TTS和进行培训时,培训会停止信息分割故障(核心倾倒)
import os
# Trainer: Where the ✨️ happens.
# TrainingArgs: Defines the set of arguments of the Trainer.
from trainer import Trainer, TrainerArgs
# GlowTTSConfig: all model related values for training, validating and testing.
from TTS.tts.configs.tacotron2_config import Tacotron2Config
# BaseDatasetConfig: defines name, formatter and path of the dataset.
from TTS.tts.configs.shared_configs import BaseDatasetConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.tacotron2 import Tacotron2
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor
from TTS.config.shared_configs import BaseAudioConfig
# we use the same path as this script as our training folder.
output_path = os.path.dirname(os.path.abspath(__file__))
# DEFINE DATASET CONFIG
# Set LJSpeech as our target dataset and define its path.
# You can also use a simple Dict to define the dataset and pass it to your custom formatter.
dataset_config = BaseDatasetConfig(
name="ljspeech", meta_file_train="metadata.csv", path=os.path.join(output_path, "../LJSpeech-1.1/")
)
# INITIALIZE THE TRAINING CONFIGURATION
# Configure the model. Every config class inherits the BaseTTSConfig.
audio_config = BaseAudioConfig(
sample_rate=22050,
do_trim_silence=True,
trim_db=60.0,
signal_norm=False,
mel_fmin=0.0,
mel_fmax=8000,
spec_gain=1.0,
log_func="np.log",
ref_level_db=20,
preemphasis=0.0,
)
config = Tacotron2Config( # This is the config that is saved for the future use
audio=audio_config,
batch_size=4,
eval_batch_size=4,
num_loader_workers=2,
num_eval_loader_workers=2,
run_eval=True,
test_delay_epochs=-1,
r=6,
gradual_training=[[0, 6, 4], [10000, 4, 4], [50000, 3, 4], [100000, 2, 4]],
double_decoder_consistency=True,
epochs=1000,
text_cleaner="phoneme_cleaners",
use_phonemes=True,
phoneme_language="en-us",
phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
print_step=10,
print_eval=True,
mixed_precision=False,
output_path=output_path,
datasets=[dataset_config],
)
# INITIALIZE THE AUDIO PROCESSOR
# Audio processor is used for feature extraction and audio I/O.
# It mainly serves to the dataloader and the training loggers.
ap = AudioProcessor.init_from_config(config)
# INITIALIZE THE TOKENIZER
# Tokenizer is used to convert text to sequences of token IDs.
# If characters are not defined in the config, default characters are passed to the config
tokenizer, config = TTSTokenizer.init_from_config(config)
# LOAD DATA SAMPLES
# Each sample is a list of ```[text, audio_file_path, speaker_name]```
# You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details.
from TTS.tts.datasets import load_tts_samples
# custom formatter implementation
def formatter(root_path, manifest_file, **kwargs): # pylint: disable=unused-argument
"""Assumes each line as ```<filename>|<transcription>```
"""
txt_file = os.path.join(root_path, manifest_file)
items = []
speaker_name = "my_speaker"
with open(txt_file, "r", encoding="utf-8") as ttf:
for line in ttf:
cols = line.split("|")
#wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
wav_file = "/media/DATA-2/TTS/coqui/LJSpeech-1.1/wavs/" + cols[0] + ".wav"
text = cols[1]
items.append({"text":text, "audio_file":wav_file, "speaker_name":speaker_name})
return items
# load training samples
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, formatter=formatter)
# INITIALIZE THE MODEL
# Models take a config object and a speaker manager as input
# Config defines the details of the model like the number of layers, the size of the embedding, etc.
# Speaker manager is used by multi-speaker models.
print("================== train_samples ========================")
print("len data : ", len(train_samples))
print(train_samples)
print("======================== eval_samples ================")
print("len data : ", len(eval_samples))
print(eval_samples)
model = Tacotron2(config, ap, tokenizer, speaker_manager=None)
# INITIALIZE THE TRAINER
# Trainer provides a generic API to train all the
I'm studying TTS and when doing training, the training stops with the information Segmentation fault (core dumped)
import os
# Trainer: Where the ✨️ happens.
# TrainingArgs: Defines the set of arguments of the Trainer.
from trainer import Trainer, TrainerArgs
# GlowTTSConfig: all model related values for training, validating and testing.
from TTS.tts.configs.tacotron2_config import Tacotron2Config
# BaseDatasetConfig: defines name, formatter and path of the dataset.
from TTS.tts.configs.shared_configs import BaseDatasetConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.tacotron2 import Tacotron2
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor
from TTS.config.shared_configs import BaseAudioConfig
# we use the same path as this script as our training folder.
output_path = os.path.dirname(os.path.abspath(__file__))
# DEFINE DATASET CONFIG
# Set LJSpeech as our target dataset and define its path.
# You can also use a simple Dict to define the dataset and pass it to your custom formatter.
dataset_config = BaseDatasetConfig(
name="ljspeech", meta_file_train="metadata.csv", path=os.path.join(output_path, "../LJSpeech-1.1/")
)
# INITIALIZE THE TRAINING CONFIGURATION
# Configure the model. Every config class inherits the BaseTTSConfig.
audio_config = BaseAudioConfig(
sample_rate=22050,
do_trim_silence=True,
trim_db=60.0,
signal_norm=False,
mel_fmin=0.0,
mel_fmax=8000,
spec_gain=1.0,
log_func="np.log",
ref_level_db=20,
preemphasis=0.0,
)
config = Tacotron2Config( # This is the config that is saved for the future use
audio=audio_config,
batch_size=4,
eval_batch_size=4,
num_loader_workers=2,
num_eval_loader_workers=2,
run_eval=True,
test_delay_epochs=-1,
r=6,
gradual_training=[[0, 6, 4], [10000, 4, 4], [50000, 3, 4], [100000, 2, 4]],
double_decoder_consistency=True,
epochs=1000,
text_cleaner="phoneme_cleaners",
use_phonemes=True,
phoneme_language="en-us",
phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
print_step=10,
print_eval=True,
mixed_precision=False,
output_path=output_path,
datasets=[dataset_config],
)
# INITIALIZE THE AUDIO PROCESSOR
# Audio processor is used for feature extraction and audio I/O.
# It mainly serves to the dataloader and the training loggers.
ap = AudioProcessor.init_from_config(config)
# INITIALIZE THE TOKENIZER
# Tokenizer is used to convert text to sequences of token IDs.
# If characters are not defined in the config, default characters are passed to the config
tokenizer, config = TTSTokenizer.init_from_config(config)
# LOAD DATA SAMPLES
# Each sample is a list of ```[text, audio_file_path, speaker_name]```
# You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details.
from TTS.tts.datasets import load_tts_samples
# custom formatter implementation
def formatter(root_path, manifest_file, **kwargs): # pylint: disable=unused-argument
"""Assumes each line as ```<filename>|<transcription>```
"""
txt_file = os.path.join(root_path, manifest_file)
items = []
speaker_name = "my_speaker"
with open(txt_file, "r", encoding="utf-8") as ttf:
for line in ttf:
cols = line.split("|")
#wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
wav_file = "/media/DATA-2/TTS/coqui/LJSpeech-1.1/wavs/" + cols[0] + ".wav"
text = cols[1]
items.append({"text":text, "audio_file":wav_file, "speaker_name":speaker_name})
return items
# load training samples
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, formatter=formatter)
# INITIALIZE THE MODEL
# Models take a config object and a speaker manager as input
# Config defines the details of the model like the number of layers, the size of the embedding, etc.
# Speaker manager is used by multi-speaker models.
print("================== train_samples ========================")
print("len data : ", len(train_samples))
print(train_samples)
print("======================== eval_samples ================")
print("len data : ", len(eval_samples))
print(eval_samples)
model = Tacotron2(config, ap, tokenizer, speaker_manager=None)
# INITIALIZE THE TRAINER
# Trainer provides a generic API to train all the ????TTS models with all its perks like mixed-precision training,
# distributed training, etc.
trainer = Trainer(
TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
)
# AND... 3,2,1... ????
trainer.fit()
training runs until step 647/2162 and stops like this:
--> STEP: 647/2162 -- GLOBAL_STEP: 647
| > decoder_loss: 35.02273 (33.81891)
| > postnet_loss: 37.02569 (35.82565)
| > stopnet_loss: 0.82287 (0.85986)
| > decoder_coarse_loss: 35.01795 (33.80500)
| > decoder_ddc_loss: 0.00264 (0.00408)
| > ga_loss: 0.00451 (0.00664)
| > decoder_diff_spec_loss: 0.42732 (0.43585)
| > postnet_diff_spec_loss: 4.44786 (4.47058)
| > decoder_ssim_loss: 0.99999 (0.99978)
| > postnet_ssim_loss: 0.99983 (0.99947)
| > loss: 29.33145 (28.48289)
| > align_error: 0.98594 (0.97906)
| > grad_norm: 3.32803 (3.73880)
| > current_lr: 0.00000
| > step_time: 0.59430 (0.45785)
| > loader_time: 0.00150 (0.00150)
Segmentation fault (core dumped)
there was an error "CUBLAS_STATUS_EXECUTION_FAILED before" so I checked the pytorch version and I'm using 1.11.0 version
and I reduced the batch size because it was previously out of memory
what sould i do?
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论