ebook2audiobook_v1.0

Running

App Files Files Community

drewThomasson commited on Oct 8, 2024

Commit

b4a108c

verified ·

1 Parent(s): bc87708

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -12

app.py CHANGED Viewed

@@ -437,7 +437,6 @@ import torch
 from TTS.api import TTS
 from nltk.tokenize import sent_tokenize
 from pydub import AudioSegment
-# Assuming split_long_sentence and wipe_folder are defined elsewhere in your code
 default_target_voice_path = "default_voice.wav"  # Ensure this is a valid path
 default_language_code = "en"
@@ -483,18 +482,30 @@ def combine_wav_files(input_directory, output_directory, file_name):
     print(f"Combined audio saved to {output_file_path}")
 # Function to split long strings into parts
-def split_long_sentence(sentence, max_length=249, max_pauses=10):
     """
     Splits a sentence into parts based on length or number of pauses without recursion.
     :param sentence: The sentence to split.
-    :param max_length: Maximum allowed length of a sentence.
     :param max_pauses: Maximum allowed number of pauses in a sentence.
     :return: A list of sentence parts that meet the criteria.
     """
     parts = []
-    while len(sentence) > max_length or sentence.count(',') + sentence.count(';') + sentence.count('.') > max_pauses:
-        possible_splits = [i for i, char in enumerate(sentence) if char in ',;.' and i < max_length]
         if possible_splits:
             # Find the best place to split the sentence, preferring the last possible split to keep parts longer
             split_at = possible_splits[-1] + 1
@@ -559,7 +570,7 @@ def convert_chapters_to_audio_custom_model(chapters_dir, output_audio_dir, targe
                 chapter_text = file.read()
                 sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
                 for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
-                    fragments = split_long_sentence(sentence, max_length=249 if language == "en" else 213, max_pauses=10)
                     for fragment in fragments:
                         if fragment != "":
                             print(f"Generating fragment: {fragment}...")
@@ -579,7 +590,7 @@ def convert_chapters_to_audio_custom_model(chapters_dir, output_audio_dir, targe
-def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, target_voice_path=None, language=None):
     selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2"
     tts = TTS(selected_tts_model, progress_bar=False).to(device)
@@ -606,14 +617,13 @@ def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, tar
                 chapter_text = file.read()
                 sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
                 for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
-                    fragments = split_long_sentence(sentence, max_length=249 if language == "en" else 213, max_pauses=10)
                     for fragment in fragments:
                         if fragment != "":
                             print(f"Generating fragment: {fragment}...")
                             fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav")
                             speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path
-                            language_code = language if language else default_language_code
-                            tts.tts_to_file(text=fragment, file_path=fragment_file_path, speaker_wav=speaker_wav_path, language=language_code)
                             temp_count += 1
             combine_wav_files(temp_audio_directory, output_audio_dir, output_file_name)
@@ -636,8 +646,11 @@ def convert_ebook_to_audio(ebook_file, target_voice_file, language, use_custom_m
     remove_folder_with_contents(full_folder_working_files)
     remove_folder_with_contents(output_audio_directory)
-    # If the language argument is set use it instead
-    language = args.language if args.language else language
     # If headless is used with the custom model arguments
     if args.use_custom_model and args.custom_model and args.custom_config and args.custom_vocab:

 from TTS.api import TTS
 from nltk.tokenize import sent_tokenize
 from pydub import AudioSegment
 default_target_voice_path = "default_voice.wav"  # Ensure this is a valid path
 default_language_code = "en"
     print(f"Combined audio saved to {output_file_path}")
 # Function to split long strings into parts
+# Modify the function to handle special cases for Chinese, Italian, and default for others
+def split_long_sentence(sentence, language='en', max_pauses=10):
     """
     Splits a sentence into parts based on length or number of pauses without recursion.
     :param sentence: The sentence to split.
+    :param language: The language of the sentence (default is English).
     :param max_pauses: Maximum allowed number of pauses in a sentence.
     :return: A list of sentence parts that meet the criteria.
     """
+    # Adjust the max_length and punctuation symbols based on language
+    if language == 'zh-cn':
+        max_length = 82  # Chinese-specific max length
+        punctuation = ['，', '。', '；', '！', '？']  # Chinese-specific punctuation
+    elif language == 'it':
+        max_length = 213  # Italian-specific max length
+        punctuation = [',', ';', '.']  # Standard punctuation
+    else:
+        max_length = 249  # Default max length for other languages
+        punctuation = [',', ';', '.']  # Default punctuation
     parts = []
+    while len(sentence) > max_length or sum(sentence.count(p) for p in punctuation) > max_pauses:
+        possible_splits = [i for i, char in enumerate(sentence) if char in punctuation and i < max_length]
         if possible_splits:
             # Find the best place to split the sentence, preferring the last possible split to keep parts longer
             split_at = possible_splits[-1] + 1
                 chapter_text = file.read()
                 sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
                 for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
+                    fragments = split_long_sentence(sentence, language=language)
                     for fragment in fragments:
                         if fragment != "":
                             print(f"Generating fragment: {fragment}...")
+def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, target_voice_path=None, language="en"):
     selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2"
     tts = TTS(selected_tts_model, progress_bar=False).to(device)
                 chapter_text = file.read()
                 sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
                 for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
+                    fragments = split_long_sentence(sentence, language=language)
                     for fragment in fragments:
                         if fragment != "":
                             print(f"Generating fragment: {fragment}...")
                             fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav")
                             speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path
+                            tts.tts_to_file(text=fragment, file_path=fragment_file_path, speaker_wav=speaker_wav_path, language=language)
                             temp_count += 1
             combine_wav_files(temp_audio_directory, output_audio_dir, output_file_name)
     remove_folder_with_contents(full_folder_working_files)
     remove_folder_with_contents(output_audio_directory)
+    # If running in headless mode, use the language from args
+    if args.headless and args.language:
+        language = args.language
+    else:
+        language = language  # Gradio dropdown value
     # If headless is used with the custom model arguments
     if args.use_custom_model and args.custom_model and args.custom_config and args.custom_vocab: