Update README.md
Browse files
README.md
CHANGED
@@ -13,6 +13,9 @@ tags:
|
|
13 |
- common_voice
|
14 |
- SberDevices/Golos
|
15 |
license: apache-2.0
|
|
|
|
|
|
|
16 |
model-index:
|
17 |
- name: XLSR Wav2Vec2 Russian with Language Model by Ivan Bondarenko
|
18 |
results:
|
@@ -26,10 +29,10 @@ model-index:
|
|
26 |
metrics:
|
27 |
- name: Test WER
|
28 |
type: wer
|
29 |
-
value: 4.
|
30 |
- name: Test CER
|
31 |
type: cer
|
32 |
-
value:
|
33 |
- task:
|
34 |
name: Speech Recognition
|
35 |
type: automatic-speech-recognition
|
@@ -40,10 +43,10 @@ model-index:
|
|
40 |
metrics:
|
41 |
- name: Test WER
|
42 |
type: wer
|
43 |
-
value:
|
44 |
- name: Test CER
|
45 |
type: cer
|
46 |
-
value:
|
47 |
- task:
|
48 |
name: Automatic Speech Recognition
|
49 |
type: automatic-speech-recognition
|
@@ -54,10 +57,10 @@ model-index:
|
|
54 |
metrics:
|
55 |
- name: Test WER
|
56 |
type: wer
|
57 |
-
value:
|
58 |
- name: Test CER
|
59 |
type: cer
|
60 |
-
value:
|
61 |
---
|
62 |
# Wav2Vec2-Large-Ru-Golos-With-LM
|
63 |
|
@@ -65,12 +68,9 @@ The Wav2Vec2 model is based on [facebook/wav2vec2-large-xlsr-53](https://hugging
|
|
65 |
|
66 |
The 2-gram language model is built on the Russian text corpus obtained from six open sources:
|
67 |
|
68 |
-
- random
|
69 |
-
-
|
70 |
-
-
|
71 |
-
- text annotations from the training part of [Sberdevices Golos](https://huggingface.co/datasets/SberDevices/Golos)
|
72 |
-
- text annotations from the training and validation parts of [CommonVoice 6.0 Ru](https://huggingface.co/datasets/common_voice)
|
73 |
-
- text annotations from [SOVA Dataset](https://github.com/sovaai/sova-dataset) (RuDevices and RuAudiobooks only, i.e. without RuYoutube).
|
74 |
|
75 |
## Usage
|
76 |
|
@@ -90,36 +90,34 @@ import torch
|
|
90 |
from datasets import load_dataset
|
91 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2ProcessorWithLM
|
92 |
|
93 |
-
LANG_ID = "ru"
|
94 |
MODEL_ID = "bond005/wav2vec2-large-ru-golos-with-lm"
|
|
|
95 |
SAMPLES = 20
|
96 |
|
97 |
nltk.download('punkt')
|
98 |
num_processes = max(1, os.cpu_count())
|
99 |
|
100 |
-
test_dataset = load_dataset(
|
101 |
-
"common_voice",
|
102 |
-
LANG_ID, split=f"test[:{SAMPLES}]"
|
103 |
-
)
|
104 |
processor = Wav2Vec2ProcessorWithLM.from_pretrained(MODEL_ID)
|
105 |
model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
|
106 |
|
107 |
# Preprocessing the datasets.
|
108 |
# We need to read the audio files as arrays
|
109 |
def speech_file_to_array_fn(batch):
|
110 |
-
speech_array
|
111 |
-
prepared_sentence = ' '.join(list(filter(
|
112 |
-
lambda it: it.isalpha(),
|
113 |
-
nltk.wordpunct_tokenize(batch["sentence"].lower().replace('ё', 'е'))
|
114 |
-
)))
|
115 |
batch["speech"] = np.asarray(speech_array, dtype=np.float32)
|
116 |
-
batch["sentence"] = prepared_sentence
|
117 |
return batch
|
118 |
|
|
|
|
|
|
|
119 |
with warnings.catch_warnings():
|
120 |
warnings.simplefilter("ignore")
|
121 |
-
test_dataset = test_dataset.map(
|
122 |
-
|
|
|
|
|
|
|
123 |
|
124 |
inputs = processor(test_dataset["speech"], sampling_rate=16_000,
|
125 |
return_tensors="pt", padding=True)
|
@@ -135,71 +133,71 @@ with warnings.catch_warnings():
|
|
135 |
warnings.simplefilter("ignore")
|
136 |
for i, predicted_sentence in enumerate(predicted_sentences):
|
137 |
print("-" * 100)
|
138 |
-
print("Reference:", test_dataset[i]["
|
139 |
print("Prediction:", predicted_sentence)
|
140 |
```
|
141 |
|
142 |
```text
|
143 |
----------------------------------------------------------------------------------------------------
|
144 |
-
Reference:
|
145 |
-
Prediction:
|
146 |
----------------------------------------------------------------------------------------------------
|
147 |
-
Reference:
|
148 |
-
Prediction:
|
149 |
----------------------------------------------------------------------------------------------------
|
150 |
-
Reference:
|
151 |
-
Prediction:
|
152 |
----------------------------------------------------------------------------------------------------
|
153 |
-
Reference:
|
154 |
-
Prediction:
|
155 |
----------------------------------------------------------------------------------------------------
|
156 |
-
Reference:
|
157 |
-
Prediction:
|
158 |
----------------------------------------------------------------------------------------------------
|
159 |
-
Reference:
|
160 |
-
Prediction:
|
161 |
----------------------------------------------------------------------------------------------------
|
162 |
-
Reference:
|
163 |
-
Prediction:
|
164 |
----------------------------------------------------------------------------------------------------
|
165 |
-
Reference:
|
166 |
-
Prediction:
|
167 |
----------------------------------------------------------------------------------------------------
|
168 |
-
Reference:
|
169 |
-
Prediction:
|
170 |
----------------------------------------------------------------------------------------------------
|
171 |
-
Reference:
|
172 |
-
Prediction:
|
173 |
----------------------------------------------------------------------------------------------------
|
174 |
-
Reference:
|
175 |
-
Prediction:
|
176 |
----------------------------------------------------------------------------------------------------
|
177 |
-
Reference:
|
178 |
-
Prediction:
|
179 |
----------------------------------------------------------------------------------------------------
|
180 |
-
Reference:
|
181 |
-
Prediction:
|
182 |
----------------------------------------------------------------------------------------------------
|
183 |
-
Reference:
|
184 |
-
Prediction:
|
185 |
----------------------------------------------------------------------------------------------------
|
186 |
-
Reference:
|
187 |
-
Prediction:
|
188 |
----------------------------------------------------------------------------------------------------
|
189 |
-
Reference:
|
190 |
-
Prediction:
|
191 |
----------------------------------------------------------------------------------------------------
|
192 |
-
Reference:
|
193 |
-
Prediction:
|
194 |
----------------------------------------------------------------------------------------------------
|
195 |
-
Reference:
|
196 |
-
Prediction:
|
197 |
----------------------------------------------------------------------------------------------------
|
198 |
-
Reference:
|
199 |
-
Prediction:
|
200 |
----------------------------------------------------------------------------------------------------
|
201 |
-
Reference:
|
202 |
-
Prediction:
|
203 |
```
|
204 |
|
205 |
|
|
|
13 |
- common_voice
|
14 |
- SberDevices/Golos
|
15 |
license: apache-2.0
|
16 |
+
widget:
|
17 |
+
- example_title: test Russian speech "нейросети это хорошо" (in English, "neural networks are good")
|
18 |
+
src: https://huggingface.co/bond005/wav2vec2-large-ru-golos/resolve/main/test_sound_ru.flac
|
19 |
model-index:
|
20 |
- name: XLSR Wav2Vec2 Russian with Language Model by Ivan Bondarenko
|
21 |
results:
|
|
|
29 |
metrics:
|
30 |
- name: Test WER
|
31 |
type: wer
|
32 |
+
value: 4.272
|
33 |
- name: Test CER
|
34 |
type: cer
|
35 |
+
value: 0.983
|
36 |
- task:
|
37 |
name: Speech Recognition
|
38 |
type: automatic-speech-recognition
|
|
|
43 |
metrics:
|
44 |
- name: Test WER
|
45 |
type: wer
|
46 |
+
value: 11.405
|
47 |
- name: Test CER
|
48 |
type: cer
|
49 |
+
value: 3.628
|
50 |
- task:
|
51 |
name: Automatic Speech Recognition
|
52 |
type: automatic-speech-recognition
|
|
|
57 |
metrics:
|
58 |
- name: Test WER
|
59 |
type: wer
|
60 |
+
value: 19.053
|
61 |
- name: Test CER
|
62 |
type: cer
|
63 |
+
value: 4.876
|
64 |
---
|
65 |
# Wav2Vec2-Large-Ru-Golos-With-LM
|
66 |
|
|
|
68 |
|
69 |
The 2-gram language model is built on the Russian text corpus obtained from six open sources:
|
70 |
|
71 |
+
- random 10% subset of [Taiga](https://tatianashavrina.github.io/taiga_site)
|
72 |
+
- [Russian Wikipedia](https://ru.wikipedia.org)
|
73 |
+
- [Russian Wikinews](https://ru.wikinews.org).
|
|
|
|
|
|
|
74 |
|
75 |
## Usage
|
76 |
|
|
|
90 |
from datasets import load_dataset
|
91 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2ProcessorWithLM
|
92 |
|
|
|
93 |
MODEL_ID = "bond005/wav2vec2-large-ru-golos-with-lm"
|
94 |
+
DATASET_ID = "bond005/sberdevices_golos_10h_crowd"
|
95 |
SAMPLES = 20
|
96 |
|
97 |
nltk.download('punkt')
|
98 |
num_processes = max(1, os.cpu_count())
|
99 |
|
100 |
+
test_dataset = load_dataset(DATASET_ID, split=f"test[:{SAMPLES}]")
|
|
|
|
|
|
|
101 |
processor = Wav2Vec2ProcessorWithLM.from_pretrained(MODEL_ID)
|
102 |
model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
|
103 |
|
104 |
# Preprocessing the datasets.
|
105 |
# We need to read the audio files as arrays
|
106 |
def speech_file_to_array_fn(batch):
|
107 |
+
speech_array = batch["audio"]["array"]
|
|
|
|
|
|
|
|
|
108 |
batch["speech"] = np.asarray(speech_array, dtype=np.float32)
|
|
|
109 |
return batch
|
110 |
|
111 |
+
removed_columns = set(test_dataset.column_names)
|
112 |
+
removed_columns -= {'transcription', 'speech'}
|
113 |
+
removed_columns = sorted(list(removed_columns))
|
114 |
with warnings.catch_warnings():
|
115 |
warnings.simplefilter("ignore")
|
116 |
+
test_dataset = test_dataset.map(
|
117 |
+
speech_file_to_array_fn,
|
118 |
+
num_proc=num_processes,
|
119 |
+
remove_columns=removed_columns
|
120 |
+
)
|
121 |
|
122 |
inputs = processor(test_dataset["speech"], sampling_rate=16_000,
|
123 |
return_tensors="pt", padding=True)
|
|
|
133 |
warnings.simplefilter("ignore")
|
134 |
for i, predicted_sentence in enumerate(predicted_sentences):
|
135 |
print("-" * 100)
|
136 |
+
print("Reference:", test_dataset[i]["transcription"])
|
137 |
print("Prediction:", predicted_sentence)
|
138 |
```
|
139 |
|
140 |
```text
|
141 |
----------------------------------------------------------------------------------------------------
|
142 |
+
Reference: шестьдесят тысяч тенге сколько будет стоить
|
143 |
+
Prediction: шестьдесят тысяч тенге сколько будет стоить
|
144 |
----------------------------------------------------------------------------------------------------
|
145 |
+
Reference: покажи мне на смотрешке телеканал синергия тв
|
146 |
+
Prediction: покажи мне на смотрешке телеканал синергия тв
|
147 |
----------------------------------------------------------------------------------------------------
|
148 |
+
Reference: заказать яблоки зеленые
|
149 |
+
Prediction: заказать яблоки зеленые
|
150 |
----------------------------------------------------------------------------------------------------
|
151 |
+
Reference: алиса закажи килограммовый торт графские развалины
|
152 |
+
Prediction: алиса закажи килограммовый торт графские развалины
|
153 |
----------------------------------------------------------------------------------------------------
|
154 |
+
Reference: ищи телеканал про бизнес на тиви
|
155 |
+
Prediction: ищи телеканал про бизнес на тви
|
156 |
----------------------------------------------------------------------------------------------------
|
157 |
+
Reference: михаила мурадяна
|
158 |
+
Prediction: михаила мурадяна
|
159 |
----------------------------------------------------------------------------------------------------
|
160 |
+
Reference: любовницы две тысячи тринадцать пятнадцатый сезон
|
161 |
+
Prediction: любовница две тысячи тринадцать пятнадцатый сезон
|
162 |
----------------------------------------------------------------------------------------------------
|
163 |
+
Reference: найди боевики
|
164 |
+
Prediction: найди боевики
|
165 |
----------------------------------------------------------------------------------------------------
|
166 |
+
Reference: гетто сезон три
|
167 |
+
Prediction: гетта сезон три
|
168 |
----------------------------------------------------------------------------------------------------
|
169 |
+
Reference: хочу посмотреть ростов папа на телевизоре
|
170 |
+
Prediction: хочу посмотреть ростов папа на телевизоре
|
171 |
----------------------------------------------------------------------------------------------------
|
172 |
+
Reference: сбер какое твое самое ненавистное занятие
|
173 |
+
Prediction: сбер какое твое самое ненавистное занятие
|
174 |
----------------------------------------------------------------------------------------------------
|
175 |
+
Reference: афина чем платят у китайцев
|
176 |
+
Prediction: афина чем платят у китайцев
|
177 |
----------------------------------------------------------------------------------------------------
|
178 |
+
Reference: джой как работает досрочное погашение кредита
|
179 |
+
Prediction: джой как работает досрочное погашение кредита
|
180 |
----------------------------------------------------------------------------------------------------
|
181 |
+
Reference: у тебя найдется люк кейдж
|
182 |
+
Prediction: у тебя найдется люк кейдж
|
183 |
----------------------------------------------------------------------------------------------------
|
184 |
+
Reference: у тебя будет лучшая часть пинк
|
185 |
+
Prediction: у тебя будет лучшая часть пинк
|
186 |
----------------------------------------------------------------------------------------------------
|
187 |
+
Reference: пожалуйста пополните мне счет
|
188 |
+
Prediction: пожалуйста пополните мне счет
|
189 |
----------------------------------------------------------------------------------------------------
|
190 |
+
Reference: анне павловне шабуровой
|
191 |
+
Prediction: анне павловне шабуровой
|
192 |
----------------------------------------------------------------------------------------------------
|
193 |
+
Reference: врубай на смотрешке муз тв
|
194 |
+
Prediction: врубай на смотрешке муз тиви
|
195 |
----------------------------------------------------------------------------------------------------
|
196 |
+
Reference: найди на смотрешке лдпр тв
|
197 |
+
Prediction: найди на смотрешке лдпр тв
|
198 |
----------------------------------------------------------------------------------------------------
|
199 |
+
Reference: сбер мне нужен педикюр забей мне место
|
200 |
+
Prediction: сбер мне нужен педикюр забелье место
|
201 |
```
|
202 |
|
203 |
|