Spaces:

bofenghuang
/

speech-to-text

Running

bofenghuang commited on Nov 4, 2022

Commit

fe32065

1 Parent(s): 694d4c4

add streaming mode

Files changed (4) hide show

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-title: Tmp
-emoji: 📉
-colorFrom: gray
 colorTo: pink
 sdk: gradio
 sdk_version: 3.9

 ---
+title: Automatic Speech Recognition in French
+emoji: 👂
+colorFrom: blue
 colorTo: pink
 sdk: gradio
 sdk_version: 3.9

app.py CHANGED Viewed

@@ -3,16 +3,26 @@ import gradio as gr
 pipe = pipeline(model="bhuang/wav2vec2-xls-r-1b-cv9-fr")
-def transcribe(audio):
-    text = pipe(audio)["text"]
-    return text
 iface = gr.Interface(
     fn=transcribe,
-    inputs=gr.Audio(source="microphone", type="filepath", label="Record something..."),
-    outputs="text",
-    title="Automatic Speech Recognition in French",
-    description="Realtime demo for French automatic speech recognition using a fine-tuned wav2vec2 model.",
 )
-iface.launch()

 pipe = pipeline(model="bhuang/wav2vec2-xls-r-1b-cv9-fr")
+def transcribe(audio, state=""):
+    text = pipe(audio, chunk_length_s=5, stride_length_s=1)["text"]
+    state += text + " "
+    return state, state
 iface = gr.Interface(
     fn=transcribe,
+    inputs=[
+        gr.Audio(source="microphone", type="filepath", streaming=True, label="Record something..."),
+        "state"
+    ],
+    outputs=[
+        "textbox",
+        "state"
+    ],
+    title="Realtime ASR in French",
+    # description="Realtime demo for French ASR using a fine-tuned wav2vec2 model.",
+    allow_flagging="never",
+    live=True
 )
+# iface.launch()
+iface.launch(server_name="0.0.0.0", share=True)

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

+transformers
+torch
+pyctcdecode
+pypi-kenlm