Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
@@ -6,23 +6,26 @@ from diffusers import StableDiffusionPipeline
|
|
6 |
|
7 |
|
8 |
MODEL_NAME = "whispy/whisper_italian"
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
model="it5/it5-efficient-small-el32-news-summarization",
|
13 |
-
)
|
14 |
-
|
15 |
-
pipe = pipeline(
|
16 |
task="automatic-speech-recognition",
|
17 |
model=MODEL_NAME,
|
18 |
chunk_length_s=30,
|
19 |
-
device="cpu"
|
20 |
-
)
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
def transcribe(microphone, file_upload):
|
28 |
warn_output = ""
|
@@ -37,12 +40,15 @@ def transcribe(microphone, file_upload):
|
|
37 |
|
38 |
file = microphone if microphone is not None else file_upload
|
39 |
|
40 |
-
text =
|
41 |
-
|
42 |
-
translate =
|
|
|
43 |
translate = translate[0]["translation_text"]
|
44 |
-
|
45 |
-
image =
|
|
|
|
|
46 |
|
47 |
return warn_output + text, translate, image
|
48 |
|
@@ -80,7 +86,9 @@ mf_transcribe = gr.Interface(
|
|
80 |
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
|
81 |
gr.inputs.Audio(source="upload", type="filepath", optional=True),
|
82 |
],
|
83 |
-
outputs=["
|
|
|
|
|
84 |
layout="horizontal",
|
85 |
theme="huggingface",
|
86 |
title="Whisper Demo: Transcribe Audio",
|
|
|
6 |
|
7 |
|
8 |
MODEL_NAME = "whispy/whisper_italian"
|
9 |
+
YOUR_TOKEN="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"
|
10 |
+
# whisper model fine-tuned for italian
|
11 |
+
speech_ppl = pipeline(
|
|
|
|
|
|
|
|
|
12 |
task="automatic-speech-recognition",
|
13 |
model=MODEL_NAME,
|
14 |
chunk_length_s=30,
|
15 |
+
device="cpu"
|
16 |
+
)
|
17 |
+
# model summarizing text
|
18 |
+
summarizer_ppl = pipeline(
|
19 |
+
"summarization",
|
20 |
+
model="it5/it5-efficient-small-el32-news-summarization"
|
21 |
+
)
|
22 |
+
# model translating text from Italian to English
|
23 |
+
translator_ppl = pipeline(
|
24 |
+
"translation",
|
25 |
+
model="Helsinki-NLP/opus-mt-it-en"
|
26 |
+
)
|
27 |
+
# model producing an image from text
|
28 |
+
image_ppl = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=YOUR_TOKEN)
|
29 |
|
30 |
def transcribe(microphone, file_upload):
|
31 |
warn_output = ""
|
|
|
40 |
|
41 |
file = microphone if microphone is not None else file_upload
|
42 |
|
43 |
+
text = speech_ppl(file)["text"]
|
44 |
+
print("Text: ", text)
|
45 |
+
translate = translator_ppl(text)
|
46 |
+
print("Translate: ", translate)
|
47 |
translate = translate[0]["translation_text"]
|
48 |
+
print("Translate 2: ", translate)
|
49 |
+
image = image_ppl(translate).images[0]
|
50 |
+
print("Image: ", image)
|
51 |
+
image.save("text-to-image.png")
|
52 |
|
53 |
return warn_output + text, translate, image
|
54 |
|
|
|
86 |
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
|
87 |
gr.inputs.Audio(source="upload", type="filepath", optional=True),
|
88 |
],
|
89 |
+
outputs=[gr.Textbox(label="Transcribed text"),
|
90 |
+
gr.Textbox(label="Summarized text"),
|
91 |
+
gr.Image(type="pil", label="Output image")],
|
92 |
layout="horizontal",
|
93 |
theme="huggingface",
|
94 |
title="Whisper Demo: Transcribe Audio",
|