Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,29 +1,27 @@
|
|
1 |
import gradio as gr
|
2 |
-
import tensorflow as tf
|
3 |
-
import transformers
|
4 |
-
|
5 |
-
|
6 |
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
|
9 |
import firebase_admin
|
10 |
from firebase_admin import credentials
|
11 |
from firebase_admin import firestore
|
12 |
import datetime
|
|
|
|
|
13 |
|
14 |
import tempfile
|
15 |
from typing import Optional
|
16 |
import numpy as np
|
17 |
from TTS.utils.manage import ModelManager
|
18 |
from TTS.utils.synthesizer import Synthesizer
|
19 |
-
|
20 |
-
import io, base64
|
21 |
-
import mediapy
|
22 |
-
import os
|
23 |
-
import sys
|
24 |
-
|
25 |
-
from PIL import Image
|
26 |
-
from huggingface_hub import snapshot_download
|
27 |
|
28 |
|
29 |
# firestore singleton is a cached multiuser instance to persist shared crowdsource memory
|
@@ -39,7 +37,31 @@ db = get_db_firestore()
|
|
39 |
|
40 |
# create ASR ML pipeline
|
41 |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
# create Text Classification pipeline
|
45 |
classifier = pipeline("text-classification")
|
@@ -145,24 +167,27 @@ def generate_interpolation(gallery):
|
|
145 |
demo = gr.Blocks()
|
146 |
|
147 |
with demo:
|
148 |
-
|
149 |
-
# Left column (inputs)
|
150 |
-
# with gr.Column():
|
151 |
audio_file = gr.inputs.Audio(source="microphone", type="filepath")
|
152 |
text = gr.Textbox()
|
153 |
label = gr.Label()
|
154 |
saved = gr.Textbox()
|
155 |
-
savedAll = gr.Textbox()
|
156 |
-
|
|
|
|
|
157 |
b1 = gr.Button("Recognize Speech")
|
158 |
b2 = gr.Button("Classify Sentiment")
|
159 |
b3 = gr.Button("Save Speech to Text")
|
160 |
b4 = gr.Button("Retrieve All")
|
161 |
-
|
|
|
162 |
b1.click(speech_to_text, inputs=audio_file, outputs=text)
|
163 |
b2.click(text_to_sentiment, inputs=text, outputs=label)
|
164 |
b3.click(upsert, inputs=text, outputs=saved)
|
165 |
b4.click(selectall, inputs=text, outputs=savedAll)
|
|
|
|
|
166 |
|
167 |
with gr.Row():
|
168 |
# Left column (inputs)
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
2 |
from transformers import pipeline
|
3 |
+
import io, base64
|
4 |
+
from PIL import Image
|
5 |
+
import numpy as np
|
6 |
+
import tensorflow as tf
|
7 |
+
import mediapy
|
8 |
+
import os
|
9 |
+
import sys
|
10 |
+
from huggingface_hub import snapshot_download
|
11 |
|
12 |
+
import streamlit as st
|
13 |
import firebase_admin
|
14 |
from firebase_admin import credentials
|
15 |
from firebase_admin import firestore
|
16 |
import datetime
|
17 |
+
from transformers import pipeline
|
18 |
+
import gradio as gr
|
19 |
|
20 |
import tempfile
|
21 |
from typing import Optional
|
22 |
import numpy as np
|
23 |
from TTS.utils.manage import ModelManager
|
24 |
from TTS.utils.synthesizer import Synthesizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
|
27 |
# firestore singleton is a cached multiuser instance to persist shared crowdsource memory
|
|
|
37 |
|
38 |
# create ASR ML pipeline
|
39 |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
40 |
+
|
41 |
+
MODEL_NAMES = [
|
42 |
+
"en/ljspeech/tacotron2-DDC",
|
43 |
+
"en/ljspeech/glow-tts",
|
44 |
+
"en/ljspeech/speedy-speech-wn",
|
45 |
+
"en/ljspeech/vits",
|
46 |
+
"en/sam/tacotron-DDC",
|
47 |
+
"fr/mai/tacotron2-DDC",
|
48 |
+
"de/thorsten/tacotron2-DCA",
|
49 |
+
]
|
50 |
+
MODELS = {}
|
51 |
+
manager = ModelManager()
|
52 |
+
for MODEL_NAME in MODEL_NAMES:
|
53 |
+
print(f"downloading {MODEL_NAME}")
|
54 |
+
model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
|
55 |
+
vocoder_name: Optional[str] = model_item["default_vocoder"]
|
56 |
+
vocoder_path = None
|
57 |
+
vocoder_config_path = None
|
58 |
+
if vocoder_name is not None:
|
59 |
+
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
|
60 |
+
|
61 |
+
synthesizer = Synthesizer(
|
62 |
+
model_path, config_path, None, vocoder_path, vocoder_config_path,
|
63 |
+
)
|
64 |
+
MODELS[MODEL_NAME] = synthesizer
|
65 |
|
66 |
# create Text Classification pipeline
|
67 |
classifier = pipeline("text-classification")
|
|
|
167 |
demo = gr.Blocks()
|
168 |
|
169 |
with demo:
|
170 |
+
|
|
|
|
|
171 |
audio_file = gr.inputs.Audio(source="microphone", type="filepath")
|
172 |
text = gr.Textbox()
|
173 |
label = gr.Label()
|
174 |
saved = gr.Textbox()
|
175 |
+
savedAll = gr.Textbox()
|
176 |
+
TTSchoice = gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES, )
|
177 |
+
audio = gr.Audio(label="Output", interactive=False)
|
178 |
+
|
179 |
b1 = gr.Button("Recognize Speech")
|
180 |
b2 = gr.Button("Classify Sentiment")
|
181 |
b3 = gr.Button("Save Speech to Text")
|
182 |
b4 = gr.Button("Retrieve All")
|
183 |
+
b5 = gr.Button("Read It Back Aloud")
|
184 |
+
|
185 |
b1.click(speech_to_text, inputs=audio_file, outputs=text)
|
186 |
b2.click(text_to_sentiment, inputs=text, outputs=label)
|
187 |
b3.click(upsert, inputs=text, outputs=saved)
|
188 |
b4.click(selectall, inputs=text, outputs=savedAll)
|
189 |
+
b5.click(tts, inputs=[text,TTSchoice], outputs=audio)
|
190 |
+
|
191 |
|
192 |
with gr.Row():
|
193 |
# Left column (inputs)
|