Spaces:

Steveeeeeeen
/

ASR-comparaison

Running

Steveeeeeeen HF staff commited on Aug 13, 2024

Commit

347882e

verified ·

1 Parent(s): b293ec6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import numpy as np
 import gradio as gr
 import json
 from pathlib import Path
 # Load WER metric
@@ -19,7 +20,10 @@ model_name = {
 # open ds_data.json
 with open("ds_data.json", "r") as f:
     table_data = json.load(f)
 def compute_wer_table(audio, text):
     # Convert the wav into an array
     audio_input = audio[1]
@@ -34,6 +38,7 @@ def compute_wer_table(audio, text):
         pipe = pipeline("automatic-speech-recognition", model=model_name[model])
         transcription = pipe(audio_input)['text']
         # transcription = transcription.translate(remove_chars)
         trans.append(transcription)
         wer = wer_metric.compute(predictions=[transcription.upper()], references=[text.upper()])
         wer_scores.append(wer)

 import gradio as gr
 import json
 from pathlib import Path
+import re
 # Load WER metric
 # open ds_data.json
 with open("ds_data.json", "r") as f:
     table_data = json.load(f)
+def clean_text(text):
+    return re.sub(r'[.,!?]', '', text)
 def compute_wer_table(audio, text):
     # Convert the wav into an array
     audio_input = audio[1]
         pipe = pipeline("automatic-speech-recognition", model=model_name[model])
         transcription = pipe(audio_input)['text']
         # transcription = transcription.translate(remove_chars)
+        transcription = clean_text(transcription)
         trans.append(transcription)
         wer = wer_metric.compute(predictions=[transcription.upper()], references=[text.upper()])
         wer_scores.append(wer)