arampacha commited on
Commit
c369354
1 Parent(s): 6b3bdcb

add example

Browse files
Files changed (3) hide show
  1. app.py +22 -17
  2. examples/dobryi_den.wav +0 -0
  3. requirements.txt +2 -1
app.py CHANGED
@@ -3,6 +3,11 @@ from librosa import to_mono, resample
3
  import numpy as np
4
  import gradio as gr
5
 
 
 
 
 
 
6
  model_id = "arampacha/wav2vec2-xls-r-1b-uk"
7
 
8
  processor = Wav2Vec2ProcessorWithLM.from_pretrained(model_id)
@@ -12,22 +17,20 @@ asr = pipeline(
12
  feature_extractor=processor.feature_extractor, decoder=processor.decoder
13
  )
14
  def run_asr(audio):
15
- try:
16
- sr, audio_array = audio
17
- audio_array = audio_array.astype(np.float32)
18
- if len(audio_array.shape) > 1:
19
- if audio_array.shape[1] == 1:
20
- audio_array = audio_array.squeeze()
21
- elif audio_array.shape[1] == 2:
22
- audio_array = to_mono(audio_array.T)
23
- else:
24
- raise ValueError("Audio with > 2 channels not supported")
25
- if sr != 16_000:
26
- audio_array = resample(audio_array, orig_sr=sr, target_sr=16_000)
27
- res = asr(audio_array, chunk_length_s=20, stride_length_s=2)["text"]
28
- except Exception as e:
29
- res = e
30
- return res
31
 
32
  text_out = gr.outputs.Textbox(label="transcript")
33
  interface = gr.Interface(
@@ -37,7 +40,9 @@ interface = gr.Interface(
37
  layout="horizontal",
38
  theme="huggingface",
39
  title="Speech-to-text Ukrainian",
40
- flagging_options=["incorrect"]
 
 
41
  )
42
 
43
  interface.launch(debug=True)
 
3
  import numpy as np
4
  import gradio as gr
5
 
6
+ DESC = """\
7
+ Ukrainian speech recognition app/
8
+ Розпізнавання голосу для української мови
9
+ """
10
+
11
  model_id = "arampacha/wav2vec2-xls-r-1b-uk"
12
 
13
  processor = Wav2Vec2ProcessorWithLM.from_pretrained(model_id)
 
17
  feature_extractor=processor.feature_extractor, decoder=processor.decoder
18
  )
19
  def run_asr(audio):
20
+ sr, audio_array = audio
21
+ audio_array = audio_array.astype(np.float32)
22
+ if len(audio_array.shape) > 1:
23
+ if audio_array.shape[1] == 1:
24
+ audio_array = audio_array.squeeze()
25
+ elif audio_array.shape[1] == 2:
26
+ audio_array = to_mono(audio_array.T)
27
+ else:
28
+ raise ValueError("Audio with > 2 channels not supported")
29
+ if sr != 16_000:
30
+ audio_array = resample(audio_array, orig_sr=sr, target_sr=16_000)
31
+ res = asr(audio_array, chunk_length_s=20, stride_length_s=2)
32
+
33
+ return res["text"]
 
 
34
 
35
  text_out = gr.outputs.Textbox(label="transcript")
36
  interface = gr.Interface(
 
40
  layout="horizontal",
41
  theme="huggingface",
42
  title="Speech-to-text Ukrainian",
43
+ description=DESC,
44
+ flagging_options=["incorrect"],
45
+ examples=["examples/dobryi_den.wav"]
46
  )
47
 
48
  interface.launch(debug=True)
examples/dobryi_den.wav ADDED
Binary file (317 kB). View file
 
requirements.txt CHANGED
@@ -3,4 +3,5 @@ torch==1.10.2+cpu
3
  librosa
4
  transformers
5
  pypi-kenlm
6
- pyctcdecode
 
 
3
  librosa
4
  transformers
5
  pypi-kenlm
6
+ pyctcdecode
7
+ ffprobe