Spaces:

rayl-aoit
/

translate_text_and_speech

Running

App Files Files Community

rayl-aoit commited on Jul 8

Commit

d0aa8cd

•

1 Parent(s): 219e01a

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -8

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
 tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
 # Function to convert audio to text using ASR
-def transcribe(audio_filepath):
     if audio_filepath is None:
         raise gr.Error("Please provide some input audio.")
@@ -44,9 +44,9 @@ def transcribe(audio_filepath):
         duration = len(data) / SAMPLE_RATE
         manifest_data = {
             "audio_filepath": converted_audio_filepath,
-            "taskname": "asr",
             "source_lang": "en",
-            "target_lang": "en",
             "pnc": "no",
             "answer": "predict",
             "duration": str(duration),
@@ -56,9 +56,9 @@ def transcribe(audio_filepath):
             fout.write(json.dumps(manifest_data))
         if duration < 40:
-            transcription = canary_model.transcribe(manifest_filepath)[0]
         else:
-            transcription = get_buffered_pred_feat_multitaskAED(
                 frame_asr,
                 canary_model.cfg.preprocessor,
                 model_stride_in_secs,
@@ -66,7 +66,7 @@ def transcribe(audio_filepath):
                 manifest=manifest_filepath,
             )[0].text
-    return transcription
 # Function to convert text to speech using TTS
 def gen_speech(text):
@@ -81,9 +81,10 @@ def gen_speech(text):
 # Root function for Gradio interface
 def start_process(audio_filepath):
-    transcription = transcribe(audio_filepath)
     print("Done transcribing")
-    translation = "working in progress"
     audio_output_filepath = gen_speech(transcription)
     print("Done speaking")
     return transcription, translation, audio_output_filepath

 tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
 # Function to convert audio to text using ASR
+def gen_text(audio_filepath, action):
     if audio_filepath is None:
         raise gr.Error("Please provide some input audio.")
         duration = len(data) / SAMPLE_RATE
         manifest_data = {
             "audio_filepath": converted_audio_filepath,
+            "taskname": action,
             "source_lang": "en",
+            "target_lang": "en" if action=="asr" else "fr",
             "pnc": "no",
             "answer": "predict",
             "duration": str(duration),
             fout.write(json.dumps(manifest_data))
         if duration < 40:
+            predicted_text = canary_model.transcribe(manifest_filepath)[0]
         else:
+            predicted_text = get_buffered_pred_feat_multitaskAED(
                 frame_asr,
                 canary_model.cfg.preprocessor,
                 model_stride_in_secs,
                 manifest=manifest_filepath,
             )[0].text
+    return predicted_text
 # Function to convert text to speech using TTS
 def gen_speech(text):
 # Root function for Gradio interface
 def start_process(audio_filepath):
+    transcription = gen_text(audio_filepath, "asr")
     print("Done transcribing")
+    translation = gen_text(audio_filepath, "ast")
+    print("Done translation")
     audio_output_filepath = gen_speech(transcription)
     print("Done speaking")
     return transcription, translation, audio_output_filepath