Spaces:

riteshkr
/

transcribe-using-q-whi-L-v3

Sleeping

riteshkr commited on Sep 12

Commit

6422215

•

1 Parent(s): 68dec23

Upload 2 files

Files changed (2) hide show

app.py ADDED Viewed

+import gradio as gr
+from transformers import pipeline
+# Load the ASR model using the Hugging Face pipeline
+model_id = "riteshkr/whisper-large-v3-quantized"  # Update with your model path or ID
+pipe = pipeline("automatic-speech-recognition", model=model_id)
+# Define the transcription function
+def transcribe_speech(filepath):
+    output = pipe(
+        filepath,
+        max_new_tokens=256,
+        generate_kwargs={
+            "task": "transcribe",
+            "language": "english",
+        },  # Update the language as per your model's fine-tuning
+        chunk_length_s=30,
+        batch_size=8,
+    )
+    return output["text"]
+# Define the Gradio interface for microphone input
+mic_transcribe = gr.Interface(
+    fn=transcribe_speech,
+    inputs=gr.Audio(source="microphone", type="filepath"),
+    outputs=gr.Textbox(),
+)
+# Define the Gradio interface for file upload input
+file_transcribe = gr.Interface(
+    fn=transcribe_speech,
+    inputs=gr.Audio(source="upload", type="filepath"),
+    outputs=gr.Textbox(),
+)
+# Creating the tabbed layout using Blocks
+demo = gr.Blocks()
+with demo:
+    gr.TabbedInterface(
+        [mic_transcribe, file_transcribe],
+        ["Transcribe Microphone", "Transcribe Audio File"],
+    )
+# Launch the app with debugging enabled
+if __name__ == "__main__":
+    demo.launch(debug=True, share=True)

requirements.txt ADDED Viewed

+gradio
+transformers
+torch