Spaces:

NeuralFalcon
/

Kokoro-TTS

Running

App Files Files Community

NeuralFalcon commited on 3 days ago

Commit

de85bf8

verified ·

1 Parent(s): 4502dba

Update KOKORO/utils.py

Browse files

Files changed (1) hide show

KOKORO/utils.py +9 -6

KOKORO/utils.py CHANGED Viewed

@@ -226,7 +226,7 @@ def parse_speechtypes_text(gen_text):
     return segments
-def podcast(MODEL, device, gen_text, speed=1.0, trim=0, pad_between_segments=0, remove_silence=True, minimum_silence=50):
     segments = parse_speechtypes_text(gen_text)
     speed = clamp_speed(speed)
     trim = clamp_trim(trim)
@@ -276,16 +276,20 @@ def podcast(MODEL, device, gen_text, speed=1.0, trim=0, pad_between_segments=0,
     return output_file
 def tts(MODEL,device,text, voice_name, speed=1.0, trim=0.5, pad_between_segments=0.5, output_file="",remove_silence=True,minimum_silence=50):
-    text=clean_text(text)
-    segments = large_text(text, voice_name)
     voice_pack_path = f"./KOKORO/voices/{voice_name}.pt"
     VOICEPACK = torch.load(voice_pack_path, weights_only=True).to(device)
     speed = clamp_speed(speed)
     trim = clamp_trim(trim)
     silence_duration = clamp_trim(pad_between_segments)
     output_file=get_random_file_name(output_file)
     if debug:
-        print(f'Loaded voice: {voice_name}')
         print(f"Speed: {speed}")
         print(f"Trim: {trim}")
         print(f"Silence duration: {silence_duration}")
@@ -305,7 +309,7 @@ def tts(MODEL,device,text, voice_name, speed=1.0, trim=0.5, pad_between_segments
             text = i[1]
             if debug:
                 print(i)
-            audio, out_ps = generate(MODEL, text, VOICEPACK, lang=voice_name[0], speed=speed)
             audio = trim_if_needed(audio, trim)
             # Scale audio from float32 to int16
@@ -339,4 +343,3 @@ def tts_file_name(text):
     # Construct the file name
     file_name = f"{temp_folder}/{truncated_text}_{random_string}.wav"
     return file_name

     return segments
+def podcast(MODEL, device, gen_text, speed=1.0, trim=0.5, pad_between_segments=0, remove_silence=True, minimum_silence=50):
     segments = parse_speechtypes_text(gen_text)
     speed = clamp_speed(speed)
     trim = clamp_trim(trim)
     return output_file
 def tts(MODEL,device,text, voice_name, speed=1.0, trim=0.5, pad_between_segments=0.5, output_file="",remove_silence=True,minimum_silence=50):
+    language = voice_name[0]
     voice_pack_path = f"./KOKORO/voices/{voice_name}.pt"
+    if voice_name.endswith(".pt"):
+        language="a"
+        voice_pack_path=voice_name
+    text=clean_text(text)
+    segments = large_text(text, language)
     VOICEPACK = torch.load(voice_pack_path, weights_only=True).to(device)
     speed = clamp_speed(speed)
     trim = clamp_trim(trim)
     silence_duration = clamp_trim(pad_between_segments)
     output_file=get_random_file_name(output_file)
     if debug:
+        print(f'Loaded voice: {voice_pack_path}')
         print(f"Speed: {speed}")
         print(f"Trim: {trim}")
         print(f"Silence duration: {silence_duration}")
             text = i[1]
             if debug:
                 print(i)
+            audio, out_ps = generate(MODEL, text, VOICEPACK, lang=language, speed=speed)
             audio = trim_if_needed(audio, trim)
             # Scale audio from float32 to int16
     # Construct the file name
     file_name = f"{temp_folder}/{truncated_text}_{random_string}.wav"
     return file_name