rayl-aoit commited on
Commit
3c2c1d3
1 Parent(s): 401a0de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -19
app.py CHANGED
@@ -16,16 +16,10 @@ SAMPLE_RATE = 16000 # Hz
16
 
17
  # load ASR model
18
  canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
19
-
20
- # update dcode params
21
  decode_cfg = canary_model.cfg.decoding
22
  decode_cfg.beam.beam_size = 1
23
  canary_model.change_decoding_strategy(decode_cfg)
24
 
25
- # load TTS model
26
- # tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
27
- # tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
28
-
29
  # Function to convert audio to text using ASR
30
  def gen_text(audio_filepath, action, source_lang, target_lang):
31
  if audio_filepath is None:
@@ -72,7 +66,6 @@ def gen_text(audio_filepath, action, source_lang, target_lang):
72
  # Function to convert text to speech using TTS
73
  def gen_speech(text, lang):
74
  set_seed(555) # Make it deterministic
75
-
76
  match lang:
77
  case "en":
78
  model = "facebook/mms-tts-eng"
@@ -85,11 +78,6 @@ def gen_speech(text, lang):
85
  case _:
86
  model = "facebook/mms-tts-eng"
87
 
88
- # if lang=="en":
89
- # model = "facebook/mms-tts-eng"
90
- # elif lang=="fr":
91
- # model = "facebook/mms-tts-fra"
92
-
93
  # load TTS model
94
  tts_model = VitsModel.from_pretrained(model)
95
  tts_tokenizer = AutoTokenizer.from_pretrained(model)
@@ -146,13 +134,18 @@ with playground:
146
  with gr.Column():
147
  clear_button = gr.ClearButton(components=[input_audio, source_lang, target_lang, transcipted_text, translated_text, translated_speech], value="Clear")
148
 
149
- # with gr.Row():
150
- # gr.Examples(
151
- # examples=["sample.wav"],
152
- # inputs=[input_audio],
153
- # outputs=[transcipted_text, translated_speech, translated_text],
154
- # run_on_click=True, cache_examples=True, fn=start_process
155
- # )
 
 
 
 
 
156
 
157
  submit_button.click(start_process, inputs=[input_audio, source_lang, target_lang], outputs=[transcipted_text, translated_text, translated_speech])
158
 
 
16
 
17
  # load ASR model
18
  canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
 
 
19
  decode_cfg = canary_model.cfg.decoding
20
  decode_cfg.beam.beam_size = 1
21
  canary_model.change_decoding_strategy(decode_cfg)
22
 
 
 
 
 
23
  # Function to convert audio to text using ASR
24
  def gen_text(audio_filepath, action, source_lang, target_lang):
25
  if audio_filepath is None:
 
66
  # Function to convert text to speech using TTS
67
  def gen_speech(text, lang):
68
  set_seed(555) # Make it deterministic
 
69
  match lang:
70
  case "en":
71
  model = "facebook/mms-tts-eng"
 
78
  case _:
79
  model = "facebook/mms-tts-eng"
80
 
 
 
 
 
 
81
  # load TTS model
82
  tts_model = VitsModel.from_pretrained(model)
83
  tts_tokenizer = AutoTokenizer.from_pretrained(model)
 
134
  with gr.Column():
135
  clear_button = gr.ClearButton(components=[input_audio, source_lang, target_lang, transcipted_text, translated_text, translated_speech], value="Clear")
136
 
137
+ with gr.Row():
138
+ gr.Examples(
139
+ examples=[
140
+ ["sample_en.wav","en","fr"],
141
+ ["sample_fr.wav","fr","de"],
142
+ ["sample_de.wav","de","es"],
143
+ ["sample_es.wav","es","en"]
144
+ ],
145
+ inputs=[input_audio, source_lang, target_lang],
146
+ outputs=[transcipted_text, translated_text, translated_speech],
147
+ run_on_click=True, cache_examples=True, fn=start_process
148
+ )
149
 
150
  submit_button.click(start_process, inputs=[input_audio, source_lang, target_lang], outputs=[transcipted_text, translated_text, translated_speech])
151