Update app.py
Browse files
app.py
CHANGED
@@ -437,7 +437,6 @@ import torch
|
|
437 |
from TTS.api import TTS
|
438 |
from nltk.tokenize import sent_tokenize
|
439 |
from pydub import AudioSegment
|
440 |
-
# Assuming split_long_sentence and wipe_folder are defined elsewhere in your code
|
441 |
|
442 |
default_target_voice_path = "default_voice.wav" # Ensure this is a valid path
|
443 |
default_language_code = "en"
|
@@ -483,18 +482,30 @@ def combine_wav_files(input_directory, output_directory, file_name):
|
|
483 |
print(f"Combined audio saved to {output_file_path}")
|
484 |
|
485 |
# Function to split long strings into parts
|
486 |
-
|
|
|
487 |
"""
|
488 |
Splits a sentence into parts based on length or number of pauses without recursion.
|
489 |
|
490 |
:param sentence: The sentence to split.
|
491 |
-
:param
|
492 |
:param max_pauses: Maximum allowed number of pauses in a sentence.
|
493 |
:return: A list of sentence parts that meet the criteria.
|
494 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
parts = []
|
496 |
-
while len(sentence) > max_length or sentence.count(
|
497 |
-
possible_splits = [i for i, char in enumerate(sentence) if char in
|
498 |
if possible_splits:
|
499 |
# Find the best place to split the sentence, preferring the last possible split to keep parts longer
|
500 |
split_at = possible_splits[-1] + 1
|
@@ -559,7 +570,7 @@ def convert_chapters_to_audio_custom_model(chapters_dir, output_audio_dir, targe
|
|
559 |
chapter_text = file.read()
|
560 |
sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
|
561 |
for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
|
562 |
-
fragments = split_long_sentence(sentence,
|
563 |
for fragment in fragments:
|
564 |
if fragment != "":
|
565 |
print(f"Generating fragment: {fragment}...")
|
@@ -579,7 +590,7 @@ def convert_chapters_to_audio_custom_model(chapters_dir, output_audio_dir, targe
|
|
579 |
|
580 |
|
581 |
|
582 |
-
def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, target_voice_path=None, language=
|
583 |
selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2"
|
584 |
tts = TTS(selected_tts_model, progress_bar=False).to(device)
|
585 |
|
@@ -606,14 +617,13 @@ def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, tar
|
|
606 |
chapter_text = file.read()
|
607 |
sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
|
608 |
for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
|
609 |
-
fragments = split_long_sentence(sentence,
|
610 |
for fragment in fragments:
|
611 |
if fragment != "":
|
612 |
print(f"Generating fragment: {fragment}...")
|
613 |
fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav")
|
614 |
speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path
|
615 |
-
|
616 |
-
tts.tts_to_file(text=fragment, file_path=fragment_file_path, speaker_wav=speaker_wav_path, language=language_code)
|
617 |
temp_count += 1
|
618 |
|
619 |
combine_wav_files(temp_audio_directory, output_audio_dir, output_file_name)
|
@@ -636,8 +646,11 @@ def convert_ebook_to_audio(ebook_file, target_voice_file, language, use_custom_m
|
|
636 |
remove_folder_with_contents(full_folder_working_files)
|
637 |
remove_folder_with_contents(output_audio_directory)
|
638 |
|
639 |
-
# If
|
640 |
-
|
|
|
|
|
|
|
641 |
|
642 |
# If headless is used with the custom model arguments
|
643 |
if args.use_custom_model and args.custom_model and args.custom_config and args.custom_vocab:
|
|
|
437 |
from TTS.api import TTS
|
438 |
from nltk.tokenize import sent_tokenize
|
439 |
from pydub import AudioSegment
|
|
|
440 |
|
441 |
default_target_voice_path = "default_voice.wav" # Ensure this is a valid path
|
442 |
default_language_code = "en"
|
|
|
482 |
print(f"Combined audio saved to {output_file_path}")
|
483 |
|
484 |
# Function to split long strings into parts
|
485 |
+
# Modify the function to handle special cases for Chinese, Italian, and default for others
|
486 |
+
def split_long_sentence(sentence, language='en', max_pauses=10):
|
487 |
"""
|
488 |
Splits a sentence into parts based on length or number of pauses without recursion.
|
489 |
|
490 |
:param sentence: The sentence to split.
|
491 |
+
:param language: The language of the sentence (default is English).
|
492 |
:param max_pauses: Maximum allowed number of pauses in a sentence.
|
493 |
:return: A list of sentence parts that meet the criteria.
|
494 |
"""
|
495 |
+
# Adjust the max_length and punctuation symbols based on language
|
496 |
+
if language == 'zh-cn':
|
497 |
+
max_length = 82 # Chinese-specific max length
|
498 |
+
punctuation = [',', '。', ';', '!', '?'] # Chinese-specific punctuation
|
499 |
+
elif language == 'it':
|
500 |
+
max_length = 213 # Italian-specific max length
|
501 |
+
punctuation = [',', ';', '.'] # Standard punctuation
|
502 |
+
else:
|
503 |
+
max_length = 249 # Default max length for other languages
|
504 |
+
punctuation = [',', ';', '.'] # Default punctuation
|
505 |
+
|
506 |
parts = []
|
507 |
+
while len(sentence) > max_length or sum(sentence.count(p) for p in punctuation) > max_pauses:
|
508 |
+
possible_splits = [i for i, char in enumerate(sentence) if char in punctuation and i < max_length]
|
509 |
if possible_splits:
|
510 |
# Find the best place to split the sentence, preferring the last possible split to keep parts longer
|
511 |
split_at = possible_splits[-1] + 1
|
|
|
570 |
chapter_text = file.read()
|
571 |
sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
|
572 |
for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
|
573 |
+
fragments = split_long_sentence(sentence, language=language)
|
574 |
for fragment in fragments:
|
575 |
if fragment != "":
|
576 |
print(f"Generating fragment: {fragment}...")
|
|
|
590 |
|
591 |
|
592 |
|
593 |
+
def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, target_voice_path=None, language="en"):
|
594 |
selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2"
|
595 |
tts = TTS(selected_tts_model, progress_bar=False).to(device)
|
596 |
|
|
|
617 |
chapter_text = file.read()
|
618 |
sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
|
619 |
for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
|
620 |
+
fragments = split_long_sentence(sentence, language=language)
|
621 |
for fragment in fragments:
|
622 |
if fragment != "":
|
623 |
print(f"Generating fragment: {fragment}...")
|
624 |
fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav")
|
625 |
speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path
|
626 |
+
tts.tts_to_file(text=fragment, file_path=fragment_file_path, speaker_wav=speaker_wav_path, language=language)
|
|
|
627 |
temp_count += 1
|
628 |
|
629 |
combine_wav_files(temp_audio_directory, output_audio_dir, output_file_name)
|
|
|
646 |
remove_folder_with_contents(full_folder_working_files)
|
647 |
remove_folder_with_contents(output_audio_directory)
|
648 |
|
649 |
+
# If running in headless mode, use the language from args
|
650 |
+
if args.headless and args.language:
|
651 |
+
language = args.language
|
652 |
+
else:
|
653 |
+
language = language # Gradio dropdown value
|
654 |
|
655 |
# If headless is used with the custom model arguments
|
656 |
if args.use_custom_model and args.custom_model and args.custom_config and args.custom_vocab:
|