Steven Zheng commited on
Commit
adae6d0
·
1 Parent(s): 34fb163

Explainations of demo.md

Browse files
Files changed (1) hide show
  1. demo.md +0 -52
demo.md CHANGED
@@ -65,58 +65,6 @@ def evaluate_model(ds, pipe, wer_metric):
65
  # Load WER metric
66
  wer_metric = evaluate.load("wer")
67
 
68
- results = {}
69
- model_wer_results = {}
70
- # Evaluate model
71
- for model in model_name:
72
- pipe = pipeline("automatic-speech-recognition", model=model_name[model])
73
- wer_scores, wer_results = evaluate_model(ds, pipe, wer_metric)
74
- results[model] = np.mean(wer_scores)
75
- model_wer_results[model] = wer_results
76
-
77
- for model in results:
78
- print(f"Model: {model}, WER: {results[model]}")
79
- ```
80
-
81
- ## Whole script
82
-
83
- ```python
84
- from datasets import load_dataset
85
- from transformers import pipeline
86
- import evaluate
87
- import numpy as np
88
- from tqdm import tqdm
89
-
90
- ds = load_dataset("openslr/librispeech_asr", "clean", split="validation", streaming=True)
91
- ds = ds.take(100)
92
-
93
- model_name = {
94
- "whisper-tiny": "openai/whisper-tiny.en",
95
- "wav2vec2-large-960h": "facebook/wav2vec2-base-960h",
96
- "distill-whisper-small": "distil-whisper/distil-small.en",
97
- }
98
-
99
- def evaluate_model(ds, pipe, wer_metric):
100
- wer_scores = []
101
- wer_results = []
102
- for idx, sample in enumerate(tqdm(ds, desc="Evaluating", total=len(list(ds)))):
103
- audio_sample = sample["audio"]
104
- transcription = pipe(audio_sample["array"])['text']
105
- # Remove , and . from the transcription
106
- transcription = transcription.replace(",", "").replace(".", "")
107
- wer = wer_metric.compute(predictions=[transcription.upper()], references=[sample["text"].upper()])
108
- wer_scores.append(wer)
109
- wer_results.append({
110
- "index": idx,
111
- "transcription": transcription.upper(),
112
- "reference": sample["text"].upper(),
113
- "wer": wer
114
- })
115
- return wer_scores, wer_results
116
-
117
- # Load WER metric
118
- wer_metric = evaluate.load("wer")
119
-
120
  results = {}
121
  model_wer_results = {}
122
  # Evaluate model
 
65
  # Load WER metric
66
  wer_metric = evaluate.load("wer")
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  results = {}
69
  model_wer_results = {}
70
  # Evaluate model