amithm3 commited on
Commit
5844c7a
·
verified ·
1 Parent(s): 08c8492

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -1
app.py CHANGED
@@ -1,3 +1,31 @@
 
1
  import gradio as gr
 
2
 
3
- gr.load("models/amithm3/whisper-medium").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import WhisperProcessor, WhisperFeatureExtractor, WhisperForConditionalGeneration
2
  import gradio as gr
3
+ import torchaudio
4
 
5
+ mdl = "models/amithm3/whisper-medium"
6
+ processor = WhisperProcessor.from_pretrained(mdl, task="transcribe")
7
+ feature_extractor = WhisperFeatureExtractor.from_pretrained(mdl, task="transcribe")
8
+ model = WhisperForConditionalGeneration.from_pretrained(mdl)
9
+ sampling_rate = 16000
10
+
11
+
12
+ def transcribe(audio, language):
13
+ audio, orig_freq = torchaudio.load(audio)
14
+ audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=sampling_rate)
15
+ audio = audio.squeeze().numpy()
16
+ input_features = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_features
17
+ model.generation_config.language = language
18
+ predicted_ids = model.generate(input_features)
19
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
20
+ return transcription
21
+
22
+
23
+ iface = gr.Interface(
24
+ fn=transcribe,
25
+ inputs=[gr.Audio(type="filepath"), gr.Dropdown(["kannada", "english", None], label="Language", value="kannada")],
26
+ outputs="text",
27
+ title="Whisper Medium Indic",
28
+ description="Realtime demo for Indic speech recognition using a fine-tuned Whisper Medium model.",
29
+ )
30
+
31
+ iface.launch()