Spaces:

projecte-aina
/

transcripcio-fonetica-catala

Running

ccoreilly commited on Apr 30, 2023

Commit

2c3e79f

•

1 Parent(s): 4e5413c

return numpy arrays

Files changed (2) hide show

app.py CHANGED Viewed

@@ -54,16 +54,15 @@ def tts(text, speaker_idx):
         print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
     print(text)
-    speakers_maping_path = os.getcwd() + "/speaker_map.json"
-    # Map speaker aliases to speaker ids
-    with open(speakers_maping_path, 'r') as fp:
-        maping = json.load(fp)
-    #speaker_idx = maping[speaker_idx]
-    # synthesize
-    wavs = model_bsc.tts(text, speaker_idx)
     # return output
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         model_bsc.save_wav(wavs, fp)
@@ -82,13 +81,17 @@ article= ""
 iface = gr.Interface(
     fn=tts,
     inputs=[
-        gr.inputs.Textbox(
             label="Text",
             default="L'Èlia i l'Alí a l'aula.  L'oli i l'ou.  Lulú olorava la lila.",
         ),
-        gr.inputs.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, default=None)
     ],
-    outputs=gr.outputs.Audio(label="Output",type="filepath"),
     title="🗣️ TTS Català Multi Parlant - VITS 🗣️",
     description=description,
     article=article,

         print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
     print(text)
+    # synthesize
+    wav_bsc = model_bsc.tts(text, speaker_idx)
+    wav_coll = model_collectivat.tts(text)
+    wav_piper = model_piper.synthesize(text)
+    return (model_bsc.tts_config.audio["sample_rate"], wav_bsc),
+         (22000, wav_coll),
+         (16000, wav_piper)
     # return output
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         model_bsc.save_wav(wavs, fp)
 iface = gr.Interface(
     fn=tts,
     inputs=[
+        gr.Textbox(
             label="Text",
             default="L'Èlia i l'Alí a l'aula.  L'oli i l'ou.  Lulú olorava la lila.",
         ),
+        gr.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, default=None)
+    ],
+    outputs=[
+        gr.Audio(label="BSC VITS",type="numpy"),
+        gr.Audio(label="Collectivat Fastspeech",type="numpy"),
+        gr.Audio(label="Piper VITS",type="numpy")
     ],
     title="🗣️ TTS Català Multi Parlant - VITS 🗣️",
     description=description,
     article=article,

engine.py CHANGED Viewed

@@ -104,7 +104,7 @@ class Piper:
             },
         )[0].squeeze((0, 1))
         audio = audio_float_to_int16(audio.squeeze())
         # Convert to WAV
         with io.BytesIO() as wav_io:
             wav_file: wave.Wave_write = wave.open(wav_io, "wb")

             },
         )[0].squeeze((0, 1))
         audio = audio_float_to_int16(audio.squeeze())
+        return audio
         # Convert to WAV
         with io.BytesIO() as wav_io:
             wav_file: wave.Wave_write = wave.open(wav_io, "wb")