Spaces:
Running
on
T4
Running
on
T4
Add second example
#4
by
sanchit-gandhi
HF staff
- opened
- app.py +19 -5
- assets/sample_input_2.mp3 +3 -0
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
|
|
|
3 |
import gradio as gr
|
4 |
import numpy as np
|
5 |
import torch
|
@@ -48,12 +49,12 @@ translator = Translator(
|
|
48 |
def predict(
|
49 |
task_name: str,
|
50 |
audio_source: str,
|
51 |
-
input_audio_mic: str
|
52 |
-
input_audio_file: str
|
53 |
-
input_text: str
|
54 |
-
source_language: str
|
55 |
target_language: str,
|
56 |
-
) -> tuple[tuple[int, np.ndarray]
|
57 |
task_name = task_name.split()[0]
|
58 |
source_language_code = LANGUAGE_NAME_TO_CODE.get(source_language, None)
|
59 |
target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
|
@@ -290,6 +291,8 @@ with gr.Blocks(css="style.css") as demo:
|
|
290 |
examples=[
|
291 |
["assets/sample_input.mp3", "French"],
|
292 |
["assets/sample_input.mp3", "Mandarin Chinese"],
|
|
|
|
|
293 |
],
|
294 |
inputs=[input_audio_file, target_language],
|
295 |
outputs=[output_audio, output_text],
|
@@ -301,6 +304,8 @@ with gr.Blocks(css="style.css") as demo:
|
|
301 |
examples=[
|
302 |
["assets/sample_input.mp3", "French"],
|
303 |
["assets/sample_input.mp3", "Mandarin Chinese"],
|
|
|
|
|
304 |
],
|
305 |
inputs=[input_audio_file, target_language],
|
306 |
outputs=[output_audio, output_text],
|
@@ -312,6 +317,10 @@ with gr.Blocks(css="style.css") as demo:
|
|
312 |
examples=[
|
313 |
["My favorite animal is the elephant.", "English", "French"],
|
314 |
["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
|
|
|
|
|
|
|
|
|
315 |
],
|
316 |
inputs=[input_text, source_language, target_language],
|
317 |
outputs=[output_audio, output_text],
|
@@ -323,6 +332,10 @@ with gr.Blocks(css="style.css") as demo:
|
|
323 |
examples=[
|
324 |
["My favorite animal is the elephant.", "English", "French"],
|
325 |
["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
|
|
|
|
|
|
|
|
|
326 |
],
|
327 |
inputs=[input_text, source_language, target_language],
|
328 |
outputs=[output_audio, output_text],
|
@@ -333,6 +346,7 @@ with gr.Blocks(css="style.css") as demo:
|
|
333 |
asr_examples = gr.Examples(
|
334 |
examples=[
|
335 |
["assets/sample_input.mp3", "English"],
|
|
|
336 |
],
|
337 |
inputs=[input_audio_file, target_language],
|
338 |
outputs=[output_audio, output_text],
|
|
|
1 |
import os
|
2 |
|
3 |
+
from typing import Union
|
4 |
import gradio as gr
|
5 |
import numpy as np
|
6 |
import torch
|
|
|
49 |
def predict(
|
50 |
task_name: str,
|
51 |
audio_source: str,
|
52 |
+
input_audio_mic: Union[str, None],
|
53 |
+
input_audio_file: Union[str, None],
|
54 |
+
input_text: Union[str, None],
|
55 |
+
source_language: Union[str, None],
|
56 |
target_language: str,
|
57 |
+
) -> tuple[Union[tuple[int, np.ndarray], None], str]:
|
58 |
task_name = task_name.split()[0]
|
59 |
source_language_code = LANGUAGE_NAME_TO_CODE.get(source_language, None)
|
60 |
target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
|
|
|
291 |
examples=[
|
292 |
["assets/sample_input.mp3", "French"],
|
293 |
["assets/sample_input.mp3", "Mandarin Chinese"],
|
294 |
+
["assets/sample_input_2.mp3", "Hindi"],
|
295 |
+
["assets/sample_input_2.mp3", "Spanish"],
|
296 |
],
|
297 |
inputs=[input_audio_file, target_language],
|
298 |
outputs=[output_audio, output_text],
|
|
|
304 |
examples=[
|
305 |
["assets/sample_input.mp3", "French"],
|
306 |
["assets/sample_input.mp3", "Mandarin Chinese"],
|
307 |
+
["assets/sample_input_2.mp3", "Hindi"],
|
308 |
+
["assets/sample_input_2.mp3", "Spanish"],
|
309 |
],
|
310 |
inputs=[input_audio_file, target_language],
|
311 |
outputs=[output_audio, output_text],
|
|
|
317 |
examples=[
|
318 |
["My favorite animal is the elephant.", "English", "French"],
|
319 |
["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
|
320 |
+
["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
|
321 |
+
"English", "Hindi"],
|
322 |
+
["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
|
323 |
+
"English", "Spanish"],
|
324 |
],
|
325 |
inputs=[input_text, source_language, target_language],
|
326 |
outputs=[output_audio, output_text],
|
|
|
332 |
examples=[
|
333 |
["My favorite animal is the elephant.", "English", "French"],
|
334 |
["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
|
335 |
+
["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
|
336 |
+
"English", "Hindi"],
|
337 |
+
["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
|
338 |
+
"English", "Spanish"],
|
339 |
],
|
340 |
inputs=[input_text, source_language, target_language],
|
341 |
outputs=[output_audio, output_text],
|
|
|
346 |
asr_examples = gr.Examples(
|
347 |
examples=[
|
348 |
["assets/sample_input.mp3", "English"],
|
349 |
+
["assets/sample_input_2.mp3", "English"],
|
350 |
],
|
351 |
inputs=[input_audio_file, target_language],
|
352 |
outputs=[output_audio, output_text],
|
assets/sample_input_2.mp3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a505a4641e3f5f0ddec9508832793aa20e63d2545530b66bc04a9bd19a742e6
|
3 |
+
size 30624
|