h4d35 commited on
Commit
cdaf851
·
1 Parent(s): 93b07eb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
2
+ import soundfile as sf
3
+ import torch
4
+ import gradio as gr
5
+
6
+
7
+ # load model and processor
8
+ processor = Wav2Vec2Processor.from_pretrained("h4d35/Wav2Vec2-hi")
9
+ model = Wav2Vec2ForCTC.from_pretrained("h4d35/Wav2Vec2-hi")
10
+
11
+ # define function to read in sound file
12
+ def map_to_array(file):
13
+ speech, _ = sf.read(file)
14
+ return speech
15
+
16
+
17
+
18
+ # tokenize
19
+ def inference(audio):
20
+ input_values = processor(map_to_array(audio.name), return_tensors="pt", padding="longest").input_values # Batch size 1
21
+
22
+ # retrieve logits
23
+ logits = model(input_values).logits
24
+
25
+ # take argmax and decode
26
+ predicted_ids = torch.argmax(logits, dim=-1)
27
+ transcription = processor.batch_decode(predicted_ids)
28
+ return transcription[0]
29
+
30
+ inputs = gr.inputs.Audio(label="Input Audio", type="file")
31
+ outputs = gr.outputs.Textbox(label="Output Text")
32
+
33
+ title = "HindiASR"
34
+ description = "HindiASR using Wav2Vec2.0"
35
+
36
+
37
+ #examples=[['poem.wav']]
38
+ gr.Interface(inference, inputs, outputs, title=title, description=description).launch()