Spaces:
Running
Running
Steven Zheng
commited on
Commit
·
adae6d0
1
Parent(s):
34fb163
Explainations of demo.md
Browse files
demo.md
CHANGED
@@ -65,58 +65,6 @@ def evaluate_model(ds, pipe, wer_metric):
|
|
65 |
# Load WER metric
|
66 |
wer_metric = evaluate.load("wer")
|
67 |
|
68 |
-
results = {}
|
69 |
-
model_wer_results = {}
|
70 |
-
# Evaluate model
|
71 |
-
for model in model_name:
|
72 |
-
pipe = pipeline("automatic-speech-recognition", model=model_name[model])
|
73 |
-
wer_scores, wer_results = evaluate_model(ds, pipe, wer_metric)
|
74 |
-
results[model] = np.mean(wer_scores)
|
75 |
-
model_wer_results[model] = wer_results
|
76 |
-
|
77 |
-
for model in results:
|
78 |
-
print(f"Model: {model}, WER: {results[model]}")
|
79 |
-
```
|
80 |
-
|
81 |
-
## Whole script
|
82 |
-
|
83 |
-
```python
|
84 |
-
from datasets import load_dataset
|
85 |
-
from transformers import pipeline
|
86 |
-
import evaluate
|
87 |
-
import numpy as np
|
88 |
-
from tqdm import tqdm
|
89 |
-
|
90 |
-
ds = load_dataset("openslr/librispeech_asr", "clean", split="validation", streaming=True)
|
91 |
-
ds = ds.take(100)
|
92 |
-
|
93 |
-
model_name = {
|
94 |
-
"whisper-tiny": "openai/whisper-tiny.en",
|
95 |
-
"wav2vec2-large-960h": "facebook/wav2vec2-base-960h",
|
96 |
-
"distill-whisper-small": "distil-whisper/distil-small.en",
|
97 |
-
}
|
98 |
-
|
99 |
-
def evaluate_model(ds, pipe, wer_metric):
|
100 |
-
wer_scores = []
|
101 |
-
wer_results = []
|
102 |
-
for idx, sample in enumerate(tqdm(ds, desc="Evaluating", total=len(list(ds)))):
|
103 |
-
audio_sample = sample["audio"]
|
104 |
-
transcription = pipe(audio_sample["array"])['text']
|
105 |
-
# Remove , and . from the transcription
|
106 |
-
transcription = transcription.replace(",", "").replace(".", "")
|
107 |
-
wer = wer_metric.compute(predictions=[transcription.upper()], references=[sample["text"].upper()])
|
108 |
-
wer_scores.append(wer)
|
109 |
-
wer_results.append({
|
110 |
-
"index": idx,
|
111 |
-
"transcription": transcription.upper(),
|
112 |
-
"reference": sample["text"].upper(),
|
113 |
-
"wer": wer
|
114 |
-
})
|
115 |
-
return wer_scores, wer_results
|
116 |
-
|
117 |
-
# Load WER metric
|
118 |
-
wer_metric = evaluate.load("wer")
|
119 |
-
|
120 |
results = {}
|
121 |
model_wer_results = {}
|
122 |
# Evaluate model
|
|
|
65 |
# Load WER metric
|
66 |
wer_metric = evaluate.load("wer")
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
results = {}
|
69 |
model_wer_results = {}
|
70 |
# Evaluate model
|