ZipNN
Browse files
README.md
CHANGED
@@ -81,7 +81,7 @@ from zipnn import zipnn_hf
|
|
81 |
zipnn_hf()
|
82 |
|
83 |
|
84 |
-
pipe = pipeline("automatic-speech-recognition", model="royleibov/wav2vec2-large-xlsr-53-english")
|
85 |
```
|
86 |
```python
|
87 |
# Load model directly
|
@@ -90,22 +90,22 @@ from zipnn import zipnn_hf
|
|
90 |
|
91 |
zipnn_hf()
|
92 |
|
93 |
-
processor = AutoProcessor.from_pretrained("royleibov/wav2vec2-large-xlsr-53-english")
|
94 |
-
model = AutoModelForCTC.from_pretrained("royleibov/wav2vec2-large-xlsr-53-english")
|
95 |
```
|
96 |
### ZipNN
|
97 |
ZipNN also allows you to seemlessly save local disk space in your cache after the model is downloaded.
|
98 |
|
99 |
To compress the cached model, simply run:
|
100 |
```bash
|
101 |
-
python zipnn_compress_path.py safetensors --model royleibov/
|
102 |
```
|
103 |
|
104 |
The model will be decompressed automatically and safely as long as `zipnn_hf()` is added at the top of the file like in the [example above](#use-this-model).
|
105 |
|
106 |
To decompress manualy, simply run:
|
107 |
```bash
|
108 |
-
python zipnn_decompress_path.py --model royleibov/
|
109 |
```
|
110 |
|
111 |
# Fine-tuned XLSR-53 large model for speech recognition in English
|
@@ -125,8 +125,11 @@ Using the [HuggingSound](https://github.com/jonatasgrosman/huggingsound) library
|
|
125 |
|
126 |
```python
|
127 |
from huggingsound import SpeechRecognitionModel
|
|
|
|
|
|
|
128 |
|
129 |
-
model = SpeechRecognitionModel("
|
130 |
audio_paths = ["/path/to/file.mp3", "/path/to/another_file.wav"]
|
131 |
|
132 |
transcriptions = model.transcribe(audio_paths)
|
@@ -139,9 +142,12 @@ import torch
|
|
139 |
import librosa
|
140 |
from datasets import load_dataset
|
141 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
|
|
|
|
|
142 |
|
143 |
LANG_ID = "en"
|
144 |
-
MODEL_ID = "
|
145 |
SAMPLES = 10
|
146 |
|
147 |
test_dataset = load_dataset("common_voice", LANG_ID, split=f"test[:{SAMPLES}]")
|
@@ -190,13 +196,13 @@ for i, predicted_sentence in enumerate(predicted_sentences):
|
|
190 |
1. To evaluate on `mozilla-foundation/common_voice_6_0` with split `test`
|
191 |
|
192 |
```bash
|
193 |
-
python eval.py --model_id
|
194 |
```
|
195 |
|
196 |
2. To evaluate on `speech-recognition-community-v2/dev_data`
|
197 |
|
198 |
```bash
|
199 |
-
python eval.py --model_id
|
200 |
```
|
201 |
|
202 |
## Citation
|
|
|
81 |
zipnn_hf()
|
82 |
|
83 |
|
84 |
+
pipe = pipeline("automatic-speech-recognition", model="royleibov/wav2vec2-large-xlsr-53-english-ZipNN-Compressed")
|
85 |
```
|
86 |
```python
|
87 |
# Load model directly
|
|
|
90 |
|
91 |
zipnn_hf()
|
92 |
|
93 |
+
processor = AutoProcessor.from_pretrained("royleibov/wav2vec2-large-xlsr-53-english-ZipNN-Compressed")
|
94 |
+
model = AutoModelForCTC.from_pretrained("royleibov/wav2vec2-large-xlsr-53-english-ZipNN-Compressed")
|
95 |
```
|
96 |
### ZipNN
|
97 |
ZipNN also allows you to seemlessly save local disk space in your cache after the model is downloaded.
|
98 |
|
99 |
To compress the cached model, simply run:
|
100 |
```bash
|
101 |
+
python zipnn_compress_path.py safetensors --model royleibov/wav2vec2-large-xlsr-53-english-ZipNN-Compressed --hf_cache
|
102 |
```
|
103 |
|
104 |
The model will be decompressed automatically and safely as long as `zipnn_hf()` is added at the top of the file like in the [example above](#use-this-model).
|
105 |
|
106 |
To decompress manualy, simply run:
|
107 |
```bash
|
108 |
+
python zipnn_decompress_path.py --model royleibov/wav2vec2-large-xlsr-53-english-ZipNN-Compressed --hf_cache
|
109 |
```
|
110 |
|
111 |
# Fine-tuned XLSR-53 large model for speech recognition in English
|
|
|
125 |
|
126 |
```python
|
127 |
from huggingsound import SpeechRecognitionModel
|
128 |
+
from zipnn import zipnn_hf
|
129 |
+
|
130 |
+
zipnn_hf()
|
131 |
|
132 |
+
model = SpeechRecognitionModel("royleibov/wav2vec2-large-xlsr-53-english-ZipNN-Compressed")
|
133 |
audio_paths = ["/path/to/file.mp3", "/path/to/another_file.wav"]
|
134 |
|
135 |
transcriptions = model.transcribe(audio_paths)
|
|
|
142 |
import librosa
|
143 |
from datasets import load_dataset
|
144 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
145 |
+
from zipnn import zipnn_hf
|
146 |
+
|
147 |
+
zipnn_hf()
|
148 |
|
149 |
LANG_ID = "en"
|
150 |
+
MODEL_ID = "royleibov/wav2vec2-large-xlsr-53-english-ZipNN-Compressed"
|
151 |
SAMPLES = 10
|
152 |
|
153 |
test_dataset = load_dataset("common_voice", LANG_ID, split=f"test[:{SAMPLES}]")
|
|
|
196 |
1. To evaluate on `mozilla-foundation/common_voice_6_0` with split `test`
|
197 |
|
198 |
```bash
|
199 |
+
python eval.py --model_id royleibov/wav2vec2-large-xlsr-53-english-ZipNN-Compressed --dataset mozilla-foundation/common_voice_6_0 --config en --split test
|
200 |
```
|
201 |
|
202 |
2. To evaluate on `speech-recognition-community-v2/dev_data`
|
203 |
|
204 |
```bash
|
205 |
+
python eval.py --model_id royleibov/wav2vec2-large-xlsr-53-english-ZipNN-Compressed --dataset speech-recognition-community-v2/dev_data --config en --split validation --chunk_length_s 5.0 --stride_length_s 1.0
|
206 |
```
|
207 |
|
208 |
## Citation
|