Spaces:

rayl-aoit
/

translate_text_and_speech

Running

rayl-aoit commited on Jul 8

Commit

6d345cb

•

1 Parent(s): e5ddf72

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,6 +25,8 @@ canary_model.change_decoding_strategy(decode_cfg)
 # load TTS model
 tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
 tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
 # Function to convert audio to text using ASR
 def gen_text(audio_filepath, action):
@@ -71,9 +73,9 @@ def gen_text(audio_filepath, action):
 # Function to convert text to speech using TTS
 def gen_speech(text):
     set_seed(555)  # Make it deterministic
-    input_text = tts_tokenizer(text, return_tensors="pt")
     with torch.no_grad():
-        outputs = tts_model(**input_text)
     waveform_np = outputs.waveform[0].cpu().numpy()
     output_file = f"{str(uuid.uuid4())}.wav"
     wav.write(output_file, rate=tts_model.config.sampling_rate, data=waveform_np)
@@ -112,13 +114,13 @@ with playground:
         with gr.Column():
             clear_button = gr.ClearButton(components=[input_audio, transcipted_text, translated_speech, translated_text], value="Clear")
-    with gr.Row():
-        gr.Examples(
-            examples=["sample.wav"],
-            inputs=[input_audio],
-            outputs=[transcipted_text, translated_speech, translated_text],
-            run_on_click=True, cache_examples=True, fn=start_process
-        )
     submit_button.click(start_process, inputs=[input_audio], outputs=[transcipted_text, translated_text, translated_speech])

 # load TTS model
 tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
 tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
+tts_fra_model = VitsModel.from_pretrained("facebook/mms-tts-fra")
+tts_fra_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-fra")
 # Function to convert audio to text using ASR
 def gen_text(audio_filepath, action):
 # Function to convert text to speech using TTS
 def gen_speech(text):
     set_seed(555)  # Make it deterministic
+    input_text = tts_fra_tokenizer(text, return_tensors="pt")
     with torch.no_grad():
+        outputs = tts_fra_model(**input_text)
     waveform_np = outputs.waveform[0].cpu().numpy()
     output_file = f"{str(uuid.uuid4())}.wav"
     wav.write(output_file, rate=tts_model.config.sampling_rate, data=waveform_np)
         with gr.Column():
             clear_button = gr.ClearButton(components=[input_audio, transcipted_text, translated_speech, translated_text], value="Clear")
+    # with gr.Row():
+    #     gr.Examples(
+    #         examples=["sample.wav"],
+    #         inputs=[input_audio],
+    #         outputs=[transcipted_text, translated_speech, translated_text],
+    #         run_on_click=True, cache_examples=True, fn=start_process
+    #     )
     submit_button.click(start_process, inputs=[input_audio], outputs=[transcipted_text, translated_text, translated_speech])