riteshkr commited on
Commit
6422215
1 Parent(s): 68dec23

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +47 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+
4
+ # Load the ASR model using the Hugging Face pipeline
5
+ model_id = "riteshkr/whisper-large-v3-quantized" # Update with your model path or ID
6
+ pipe = pipeline("automatic-speech-recognition", model=model_id)
7
+
8
+ # Define the transcription function
9
+ def transcribe_speech(filepath):
10
+ output = pipe(
11
+ filepath,
12
+ max_new_tokens=256,
13
+ generate_kwargs={
14
+ "task": "transcribe",
15
+ "language": "english",
16
+ }, # Update the language as per your model's fine-tuning
17
+ chunk_length_s=30,
18
+ batch_size=8,
19
+ )
20
+ return output["text"]
21
+
22
+ # Define the Gradio interface for microphone input
23
+ mic_transcribe = gr.Interface(
24
+ fn=transcribe_speech,
25
+ inputs=gr.Audio(source="microphone", type="filepath"),
26
+ outputs=gr.Textbox(),
27
+ )
28
+
29
+ # Define the Gradio interface for file upload input
30
+ file_transcribe = gr.Interface(
31
+ fn=transcribe_speech,
32
+ inputs=gr.Audio(source="upload", type="filepath"),
33
+ outputs=gr.Textbox(),
34
+ )
35
+
36
+ # Creating the tabbed layout using Blocks
37
+ demo = gr.Blocks()
38
+
39
+ with demo:
40
+ gr.TabbedInterface(
41
+ [mic_transcribe, file_transcribe],
42
+ ["Transcribe Microphone", "Transcribe Audio File"],
43
+ )
44
+
45
+ # Launch the app with debugging enabled
46
+ if __name__ == "__main__":
47
+ demo.launch(debug=True, share=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch