Spaces:

bofenghuang
/

speech-to-text

Running

bofenghuang commited on Nov 4, 2022

Commit

9844004

1 Parent(s): 07fa407

add upload

Files changed (4) hide show

app.py CHANGED Viewed

	@@ -1 +1 @@
1	- ~~run_demo~~.py


1	+ run_demo_microphone.py

run_demo_file.py ADDED Viewed

+import logging
+import gradio as gr
+from transformers import pipeline
+logging.basicConfig(
+    format="%(asctime)s [%(levelname)s] [%(name)s] %(message)s",
+    datefmt="%Y-%m-%dT%H:%M:%SZ",
+)
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+pipe = pipeline(model="bhuang/wav2vec2-xls-r-1b-cv9-fr")
+def transcribe(audio):
+    # text = pipe(audio, chunk_length_s=30, stride_length_s=5)["text"]
+    text = pipe(audio)["text"]
+    logger.info(f"Transcription for {audio}: {text}")
+    return text
+iface = gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(source="upload", type="filepath", label="Upload some audio file..."),
+    outputs="text",
+    title="Speech-to-Text in French",
+    description="Realtime demo for French automatic speech recognition.",
+    allow_flagging="never",
+)
+# iface.launch(server_name="0.0.0.0", debug=True, share=False)
+iface.launch()

run_demo.py → run_demo_microphone.py RENAMED Viewed

File without changes

run_demo_streaming.py → run_demo_microphone_streaming.py RENAMED Viewed

@@ -8,7 +8,7 @@ logging.basicConfig(
     datefmt="%Y-%m-%dT%H:%M:%SZ",
 )
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
 pipe = pipeline(model="bhuang/wav2vec2-xls-r-1b-cv9-fr")

     datefmt="%Y-%m-%dT%H:%M:%SZ",
 )
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
 pipe = pipeline(model="bhuang/wav2vec2-xls-r-1b-cv9-fr")