csukuangfj commited on
Commit
7bb5ae7
·
1 Parent(s): 66b4229

Output error information and RTF.

Browse files
Files changed (1) hide show
  1. app.py +92 -9
app.py CHANGED
@@ -40,13 +40,80 @@ def convert_to_wav(in_filename: str) -> str:
40
  return out_filename
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def process(
44
  in_filename: str,
45
  language: str,
46
  repo_id: str,
47
  decoding_method: str,
48
  num_active_paths: int,
49
- ) -> str:
50
  logging.info(f"in_filename: {in_filename}")
51
  logging.info(f"language: {language}")
52
  logging.info(f"repo_id: {repo_id}")
@@ -88,11 +155,16 @@ def process(
88
  rtf = (end - start) / duration
89
 
90
  logging.info(f"Finished at {date_time} s. Elapsed: {end - start: .3f} s")
91
- logging.info(f"Duration {duration: .3f} s")
92
- logging.info(f"RTF {rtf: .3f}")
 
 
 
 
 
93
  logging.info(f"hyp:\n{hyp}")
94
 
95
- return hyp
96
 
97
 
98
  title = "# Automatic Speech Recognition with Next-gen Kaldi"
@@ -107,6 +179,15 @@ See more information by visiting the following links:
107
  - <https://github.com/lhotse-speech/lhotse>
108
  """
109
 
 
 
 
 
 
 
 
 
 
110
 
111
  def update_model_dropdown(language: str):
112
  if language in language_to_models:
@@ -116,7 +197,7 @@ def update_model_dropdown(language: str):
116
  raise ValueError(f"Unsupported language: {language}")
117
 
118
 
119
- demo = gr.Blocks()
120
 
121
  with demo:
122
  gr.Markdown(title)
@@ -162,6 +243,7 @@ with demo:
162
  )
163
  upload_button = gr.Button("Submit for recognition")
164
  uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
 
165
 
166
  with gr.TabItem("Record from microphone"):
167
  microphone = gr.Audio(
@@ -173,9 +255,10 @@ with demo:
173
 
174
  record_button = gr.Button("Submit for recognition")
175
  recorded_output = gr.Textbox(label="Recognized speech from recordings")
 
176
 
177
  upload_button.click(
178
- process,
179
  inputs=[
180
  uploaded_file,
181
  language_radio,
@@ -183,10 +266,10 @@ with demo:
183
  decoding_method_radio,
184
  num_active_paths_slider,
185
  ],
186
- outputs=uploaded_output,
187
  )
188
  record_button.click(
189
- process,
190
  inputs=[
191
  microphone,
192
  language_radio,
@@ -194,7 +277,7 @@ with demo:
194
  decoding_method_radio,
195
  num_active_paths_slider,
196
  ],
197
- outputs=recorded_output,
198
  )
199
  gr.Markdown(description)
200
 
 
40
  return out_filename
41
 
42
 
43
+ def build_html_output(s: str, style: str = "result_item_success"):
44
+ return f"""
45
+ <div class='result'>
46
+ <div class='result_item {style}'>
47
+ {s}
48
+ </div>
49
+ </div>
50
+ """
51
+
52
+
53
+ def process_uploaded_file(
54
+ in_filename: str,
55
+ language: str,
56
+ repo_id: str,
57
+ decoding_method: str,
58
+ num_active_paths: int,
59
+ ):
60
+ if in_filename is None or in_filename == "":
61
+ return "", build_html_output(
62
+ "Please first upload a file and then click "
63
+ 'the button "submit for recognition"',
64
+ "result_item_error",
65
+ )
66
+
67
+ logging.info(f"Processing uploaded file: {in_filename}")
68
+ try:
69
+ return process(
70
+ in_filename=in_filename,
71
+ language=language,
72
+ repo_id=repo_id,
73
+ decoding_method=decoding_method,
74
+ num_active_paths=num_active_paths,
75
+ )
76
+ except Exception as e:
77
+ logging.info(str(e))
78
+ return "", build_html_output(str(e), "result_item_error")
79
+
80
+
81
+ def process_microphone(
82
+ in_filename: str,
83
+ language: str,
84
+ repo_id: str,
85
+ decoding_method: str,
86
+ num_active_paths: int,
87
+ ):
88
+ if in_filename is None or in_filename == "":
89
+ return "", build_html_output(
90
+ "Please first click 'Record from microphone', speak, "
91
+ "click 'Stop recording', and then "
92
+ "click the button 'submit for recognition'",
93
+ "result_item_error",
94
+ )
95
+
96
+ logging.info(f"Processing microphone: {in_filename}")
97
+ try:
98
+ return process(
99
+ in_filename=in_filename,
100
+ language=language,
101
+ repo_id=repo_id,
102
+ decoding_method=decoding_method,
103
+ num_active_paths=num_active_paths,
104
+ )
105
+ except Exception as e:
106
+ logging.info(str(e))
107
+ return "", build_html_output(str(e), "result_item_error")
108
+
109
+
110
  def process(
111
  in_filename: str,
112
  language: str,
113
  repo_id: str,
114
  decoding_method: str,
115
  num_active_paths: int,
116
+ ):
117
  logging.info(f"in_filename: {in_filename}")
118
  logging.info(f"language: {language}")
119
  logging.info(f"repo_id: {repo_id}")
 
155
  rtf = (end - start) / duration
156
 
157
  logging.info(f"Finished at {date_time} s. Elapsed: {end - start: .3f} s")
158
+
159
+ info = f"""
160
+ Wave duration : {duration: .3f} s <br/>
161
+ Processing time: {end - start: .3f} s <br/>
162
+ RTF: {end - start: .3f}/{duration: .3f} = {(end - start)/duration:.3f} <br/>
163
+ """
164
+ logging.info(info)
165
  logging.info(f"hyp:\n{hyp}")
166
 
167
+ return hyp, build_html_output(info)
168
 
169
 
170
  title = "# Automatic Speech Recognition with Next-gen Kaldi"
 
179
  - <https://github.com/lhotse-speech/lhotse>
180
  """
181
 
182
+ # css style is copied from
183
+ # https://huggingface.co/spaces/alphacep/asr/blob/main/app.py#L113
184
+ css = """
185
+ .result {display:flex;flex-direction:column}
186
+ .result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
187
+ .result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
188
+ .result_item_error {background-color:#ff7070;color:white;align-self:start}
189
+ """
190
+
191
 
192
  def update_model_dropdown(language: str):
193
  if language in language_to_models:
 
197
  raise ValueError(f"Unsupported language: {language}")
198
 
199
 
200
+ demo = gr.Blocks(css=css)
201
 
202
  with demo:
203
  gr.Markdown(title)
 
243
  )
244
  upload_button = gr.Button("Submit for recognition")
245
  uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
246
+ uploaded_html_info = gr.HTML(label="Info")
247
 
248
  with gr.TabItem("Record from microphone"):
249
  microphone = gr.Audio(
 
255
 
256
  record_button = gr.Button("Submit for recognition")
257
  recorded_output = gr.Textbox(label="Recognized speech from recordings")
258
+ recorded_html_info = gr.HTML(label="Info")
259
 
260
  upload_button.click(
261
+ process_uploaded_file,
262
  inputs=[
263
  uploaded_file,
264
  language_radio,
 
266
  decoding_method_radio,
267
  num_active_paths_slider,
268
  ],
269
+ outputs=[uploaded_output, uploaded_html_info],
270
  )
271
  record_button.click(
272
+ process_microphone,
273
  inputs=[
274
  microphone,
275
  language_radio,
 
277
  decoding_method_radio,
278
  num_active_paths_slider,
279
  ],
280
+ outputs=[recorded_output, recorded_html_info],
281
  )
282
  gr.Markdown(description)
283