Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions TTS/tts/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from TTS.utils.audio import AudioProcessor
from TTS.utils.audio.numpy_transforms import compute_energy as calculate_energy

import mutagen

# to prevent too many open files error as suggested here
# https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
torch.multiprocessing.set_sharing_strategy("file_system")
Expand Down Expand Up @@ -42,6 +44,15 @@ def string2filename(string):
return filename


def get_audio_size(audiopath):
extension = audiopath.rpartition(".")[-1].lower()
if extension not in {"mp3", "wav", "flac"}:
raise RuntimeError(f"The audio format {extension} is not supported, please convert the audio files to mp3, flac, or wav format!")

audio_info = mutagen.File(audiopath).info
return int(audio_info.length * audio_info.sample_rate)


class TTSDataset(Dataset):
def __init__(
self,
Expand Down Expand Up @@ -176,7 +187,7 @@ def lengths(self):
lens = []
for item in self.samples:
_, wav_file, *_ = _parse_sample(item)
audio_len = os.path.getsize(wav_file) / 16 * 8 # assuming 16bit audio
audio_len = get_audio_size(wav_file)
lens.append(audio_len)
return lens

Expand Down Expand Up @@ -295,7 +306,7 @@ def load_data(self, idx):
def _compute_lengths(samples):
new_samples = []
for item in samples:
audio_length = os.path.getsize(item["audio_file"]) / 16 * 8 # assuming 16bit audio
audio_length = get_audio_size(item["audio_file"])
text_lenght = len(item["text"])
item["audio_length"] = audio_length
item["text_length"] = text_lenght
Expand Down
6 changes: 4 additions & 2 deletions TTS/tts/models/xtts.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,11 +756,13 @@ def load_checkpoint(

model_path = checkpoint_path or os.path.join(checkpoint_dir, "model.pth")
vocab_path = vocab_path or os.path.join(checkpoint_dir, "vocab.json")
speaker_file_path = speaker_file_path or os.path.join(checkpoint_dir, "speakers_xtts.pth")

if speaker_file_path is None and checkpoint_dir is not None:
speaker_file_path = os.path.join(checkpoint_dir, "speakers_xtts.pth")

self.language_manager = LanguageManager(config)
self.speaker_manager = None
if os.path.exists(speaker_file_path):
if speaker_file_path is not None and os.path.exists(speaker_file_path):
self.speaker_manager = SpeakerManager(speaker_file_path)

if os.path.exists(vocab_path):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pyyaml>=6.0
fsspec>=2023.6.0 # <= 2023.9.1 makes aux tests fail
aiohttp>=3.8.1
packaging>=23.1
mutagen==1.47.0
# deps for examples
flask>=2.0.1
# deps for inference
Expand Down
9 changes: 9 additions & 0 deletions tests/data/ljspeech/metadata_flac.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
audio_file|text|transcription|speaker_name
wavs/LJ001-0001.flac|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|ljspeech-0
wavs/LJ001-0002.flac|in being comparatively modern.|in being comparatively modern.|ljspeech-0
wavs/LJ001-0003.flac|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|ljspeech-1
wavs/LJ001-0004.flac|produced the block books, which were the immediate predecessors of the true printed book,|produced the block books, which were the immediate predecessors of the true printed book,|ljspeech-1
wavs/LJ001-0005.flac|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2
wavs/LJ001-0006.flac|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2
wavs/LJ001-0007.flac|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3
wavs/LJ001-0008.flac|has never been surpassed.|has never been surpassed.|ljspeech-3
9 changes: 9 additions & 0 deletions tests/data/ljspeech/metadata_mp3.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
audio_file|text|transcription|speaker_name
wavs/LJ001-0001.mp3|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|ljspeech-0
wavs/LJ001-0002.mp3|in being comparatively modern.|in being comparatively modern.|ljspeech-0
wavs/LJ001-0003.mp3|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|ljspeech-1
wavs/LJ001-0004.mp3|produced the block books, which were the immediate predecessors of the true printed book,|produced the block books, which were the immediate predecessors of the true printed book,|ljspeech-1
wavs/LJ001-0005.mp3|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2
wavs/LJ001-0006.mp3|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2
wavs/LJ001-0007.mp3|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3
wavs/LJ001-0008.mp3|has never been surpassed.|has never been surpassed.|ljspeech-3
9 changes: 9 additions & 0 deletions tests/data/ljspeech/metadata_wav.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
audio_file|text|transcription|speaker_name
wavs/LJ001-0001.wav|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|ljspeech-0
wavs/LJ001-0002.wav|in being comparatively modern.|in being comparatively modern.|ljspeech-0
wavs/LJ001-0003.wav|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|ljspeech-1
wavs/LJ001-0004.wav|produced the block books, which were the immediate predecessors of the true printed book,|produced the block books, which were the immediate predecessors of the true printed book,|ljspeech-1
wavs/LJ001-0005.wav|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2
wavs/LJ001-0006.wav|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2
wavs/LJ001-0007.wav|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3
wavs/LJ001-0008.wav|has never been surpassed.|has never been surpassed.|ljspeech-3
Binary file added tests/data/ljspeech/wavs/LJ001-0001.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0001.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0002.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0002.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0003.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0003.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0004.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0004.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0005.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0005.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0006.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0006.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0007.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0007.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0008.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0008.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0009.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0009.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0010.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0010.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0011.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0011.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0012.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0012.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0013.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0013.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0014.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0014.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0015.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0015.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0016.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0016.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0017.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0017.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0018.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0018.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0019.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0019.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0020.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0020.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0021.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0021.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0022.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0022.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0023.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0023.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0024.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0024.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0025.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0025.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0026.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0026.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0027.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0027.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0028.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0028.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0029.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0029.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0030.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0030.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0031.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0031.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0032.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0032.mp3
Binary file not shown.
Loading