drewThomasson commited on
Commit
b4a108c
·
verified ·
1 Parent(s): bc87708

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -12
app.py CHANGED
@@ -437,7 +437,6 @@ import torch
437
  from TTS.api import TTS
438
  from nltk.tokenize import sent_tokenize
439
  from pydub import AudioSegment
440
- # Assuming split_long_sentence and wipe_folder are defined elsewhere in your code
441
 
442
  default_target_voice_path = "default_voice.wav" # Ensure this is a valid path
443
  default_language_code = "en"
@@ -483,18 +482,30 @@ def combine_wav_files(input_directory, output_directory, file_name):
483
  print(f"Combined audio saved to {output_file_path}")
484
 
485
  # Function to split long strings into parts
486
- def split_long_sentence(sentence, max_length=249, max_pauses=10):
 
487
  """
488
  Splits a sentence into parts based on length or number of pauses without recursion.
489
 
490
  :param sentence: The sentence to split.
491
- :param max_length: Maximum allowed length of a sentence.
492
  :param max_pauses: Maximum allowed number of pauses in a sentence.
493
  :return: A list of sentence parts that meet the criteria.
494
  """
 
 
 
 
 
 
 
 
 
 
 
495
  parts = []
496
- while len(sentence) > max_length or sentence.count(',') + sentence.count(';') + sentence.count('.') > max_pauses:
497
- possible_splits = [i for i, char in enumerate(sentence) if char in ',;.' and i < max_length]
498
  if possible_splits:
499
  # Find the best place to split the sentence, preferring the last possible split to keep parts longer
500
  split_at = possible_splits[-1] + 1
@@ -559,7 +570,7 @@ def convert_chapters_to_audio_custom_model(chapters_dir, output_audio_dir, targe
559
  chapter_text = file.read()
560
  sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
561
  for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
562
- fragments = split_long_sentence(sentence, max_length=249 if language == "en" else 213, max_pauses=10)
563
  for fragment in fragments:
564
  if fragment != "":
565
  print(f"Generating fragment: {fragment}...")
@@ -579,7 +590,7 @@ def convert_chapters_to_audio_custom_model(chapters_dir, output_audio_dir, targe
579
 
580
 
581
 
582
- def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, target_voice_path=None, language=None):
583
  selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2"
584
  tts = TTS(selected_tts_model, progress_bar=False).to(device)
585
 
@@ -606,14 +617,13 @@ def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, tar
606
  chapter_text = file.read()
607
  sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
608
  for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
609
- fragments = split_long_sentence(sentence, max_length=249 if language == "en" else 213, max_pauses=10)
610
  for fragment in fragments:
611
  if fragment != "":
612
  print(f"Generating fragment: {fragment}...")
613
  fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav")
614
  speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path
615
- language_code = language if language else default_language_code
616
- tts.tts_to_file(text=fragment, file_path=fragment_file_path, speaker_wav=speaker_wav_path, language=language_code)
617
  temp_count += 1
618
 
619
  combine_wav_files(temp_audio_directory, output_audio_dir, output_file_name)
@@ -636,8 +646,11 @@ def convert_ebook_to_audio(ebook_file, target_voice_file, language, use_custom_m
636
  remove_folder_with_contents(full_folder_working_files)
637
  remove_folder_with_contents(output_audio_directory)
638
 
639
- # If the language argument is set use it instead
640
- language = args.language if args.language else language
 
 
 
641
 
642
  # If headless is used with the custom model arguments
643
  if args.use_custom_model and args.custom_model and args.custom_config and args.custom_vocab:
 
437
  from TTS.api import TTS
438
  from nltk.tokenize import sent_tokenize
439
  from pydub import AudioSegment
 
440
 
441
  default_target_voice_path = "default_voice.wav" # Ensure this is a valid path
442
  default_language_code = "en"
 
482
  print(f"Combined audio saved to {output_file_path}")
483
 
484
  # Function to split long strings into parts
485
+ # Modify the function to handle special cases for Chinese, Italian, and default for others
486
+ def split_long_sentence(sentence, language='en', max_pauses=10):
487
  """
488
  Splits a sentence into parts based on length or number of pauses without recursion.
489
 
490
  :param sentence: The sentence to split.
491
+ :param language: The language of the sentence (default is English).
492
  :param max_pauses: Maximum allowed number of pauses in a sentence.
493
  :return: A list of sentence parts that meet the criteria.
494
  """
495
+ # Adjust the max_length and punctuation symbols based on language
496
+ if language == 'zh-cn':
497
+ max_length = 82 # Chinese-specific max length
498
+ punctuation = [',', '。', ';', '!', '?'] # Chinese-specific punctuation
499
+ elif language == 'it':
500
+ max_length = 213 # Italian-specific max length
501
+ punctuation = [',', ';', '.'] # Standard punctuation
502
+ else:
503
+ max_length = 249 # Default max length for other languages
504
+ punctuation = [',', ';', '.'] # Default punctuation
505
+
506
  parts = []
507
+ while len(sentence) > max_length or sum(sentence.count(p) for p in punctuation) > max_pauses:
508
+ possible_splits = [i for i, char in enumerate(sentence) if char in punctuation and i < max_length]
509
  if possible_splits:
510
  # Find the best place to split the sentence, preferring the last possible split to keep parts longer
511
  split_at = possible_splits[-1] + 1
 
570
  chapter_text = file.read()
571
  sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
572
  for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
573
+ fragments = split_long_sentence(sentence, language=language)
574
  for fragment in fragments:
575
  if fragment != "":
576
  print(f"Generating fragment: {fragment}...")
 
590
 
591
 
592
 
593
+ def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, target_voice_path=None, language="en"):
594
  selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2"
595
  tts = TTS(selected_tts_model, progress_bar=False).to(device)
596
 
 
617
  chapter_text = file.read()
618
  sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
619
  for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
620
+ fragments = split_long_sentence(sentence, language=language)
621
  for fragment in fragments:
622
  if fragment != "":
623
  print(f"Generating fragment: {fragment}...")
624
  fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav")
625
  speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path
626
+ tts.tts_to_file(text=fragment, file_path=fragment_file_path, speaker_wav=speaker_wav_path, language=language)
 
627
  temp_count += 1
628
 
629
  combine_wav_files(temp_audio_directory, output_audio_dir, output_file_name)
 
646
  remove_folder_with_contents(full_folder_working_files)
647
  remove_folder_with_contents(output_audio_directory)
648
 
649
+ # If running in headless mode, use the language from args
650
+ if args.headless and args.language:
651
+ language = args.language
652
+ else:
653
+ language = language # Gradio dropdown value
654
 
655
  # If headless is used with the custom model arguments
656
  if args.use_custom_model and args.custom_model and args.custom_config and args.custom_vocab: