XTTS_finetuned_dani

Sleeping

rockdrigoma commited on Aug 28, 2024

Commit

a7161ed

verified ·

1 Parent(s): 3788025

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,9 +23,11 @@ os.environ["COQUI_TOS_AGREED"] = "1"
 device = "cuda"
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_bill_spa").to(device)
-print(tts.synthesizer.tts_checkpoint)
-"""
 def clear_gpu_cache():
     # clear the GPU cache
     if torch.cuda.is_available():
@@ -46,7 +48,6 @@ def load_model(xtts_checkpoint, xtts_config, xtts_vocab):
         XTTS_MODEL.cuda()
     print("Model Loaded!")
-    return "Model Loaded!"
 def run_tts(lang='es', tts_text, speaker_audio_file):
     if XTTS_MODEL is None or not speaker_audio_file:
@@ -69,14 +70,16 @@ def run_tts(lang='es', tts_text, speaker_audio_file):
         out["wav"] = torch.tensor(out["wav"]).unsqueeze(0)
         out_path = fp.name
         torchaudio.save(out_path, out["wav"], 24000)
-    return "Speech generated !", out_path, speaker_audio_file
-"""
 @spaces.GPU(enable_queue=True)
 def generate(text, audio):
-    tts.tts_to_file(text=text, speaker_wav=audio, language="es", file_path="./output.wav")
-    return "./output.wav"
 demo = gr.Interface(
     fn=generate,

 device = "cuda"
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_bill_spa").to(device)
+model_path = '/home/user/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_bill_spa/model.pth'
+config_path = '/home/user/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_bill_spa/config.json'
+vocab_path = '/home/user/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_bill_spa/vocab.json'
 def clear_gpu_cache():
     # clear the GPU cache
     if torch.cuda.is_available():
         XTTS_MODEL.cuda()
     print("Model Loaded!")
 def run_tts(lang='es', tts_text, speaker_audio_file):
     if XTTS_MODEL is None or not speaker_audio_file:
         out["wav"] = torch.tensor(out["wav"]).unsqueeze(0)
         out_path = fp.name
         torchaudio.save(out_path, out["wav"], 24000)
+    print("Speech generated !")
+    return out_path, speaker_audio_file
 @spaces.GPU(enable_queue=True)
 def generate(text, audio):
+    load_model(model_path, config_path, vocab_path)
+    out_path, speaker_audio_file = run_tts(lang='es', tts_text=text, speaker_audio_file=audio)
+    return out_path
 demo = gr.Interface(
     fn=generate,