Steveeeeeeen HF staff commited on
Commit
347882e
1 Parent(s): b293ec6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -5,6 +5,7 @@ import numpy as np
5
  import gradio as gr
6
  import json
7
  from pathlib import Path
 
8
 
9
 
10
  # Load WER metric
@@ -19,7 +20,10 @@ model_name = {
19
  # open ds_data.json
20
  with open("ds_data.json", "r") as f:
21
  table_data = json.load(f)
22
-
 
 
 
23
  def compute_wer_table(audio, text):
24
  # Convert the wav into an array
25
  audio_input = audio[1]
@@ -34,6 +38,7 @@ def compute_wer_table(audio, text):
34
  pipe = pipeline("automatic-speech-recognition", model=model_name[model])
35
  transcription = pipe(audio_input)['text']
36
  # transcription = transcription.translate(remove_chars)
 
37
  trans.append(transcription)
38
  wer = wer_metric.compute(predictions=[transcription.upper()], references=[text.upper()])
39
  wer_scores.append(wer)
 
5
  import gradio as gr
6
  import json
7
  from pathlib import Path
8
+ import re
9
 
10
 
11
  # Load WER metric
 
20
  # open ds_data.json
21
  with open("ds_data.json", "r") as f:
22
  table_data = json.load(f)
23
+
24
+ def clean_text(text):
25
+ return re.sub(r'[.,!?]', '', text)
26
+
27
  def compute_wer_table(audio, text):
28
  # Convert the wav into an array
29
  audio_input = audio[1]
 
38
  pipe = pipeline("automatic-speech-recognition", model=model_name[model])
39
  transcription = pipe(audio_input)['text']
40
  # transcription = transcription.translate(remove_chars)
41
+ transcription = clean_text(transcription)
42
  trans.append(transcription)
43
  wer = wer_metric.compute(predictions=[transcription.upper()], references=[text.upper()])
44
  wer_scores.append(wer)