csukuangfj
commited on
Commit
·
27c18ec
1
Parent(s):
f5b3bb8
add whisper
Browse files
app.py
CHANGED
@@ -19,7 +19,6 @@
|
|
19 |
# References:
|
20 |
# https://gradio.app/docs/#dropdown
|
21 |
|
22 |
-
import base64
|
23 |
import logging
|
24 |
import os
|
25 |
import tempfile
|
@@ -47,11 +46,6 @@ def convert_to_wav(in_filename: str) -> str:
|
|
47 |
f"ffmpeg -hide_banner -loglevel error -i '{in_filename}' -ar 16000 '{out_filename}.flac'"
|
48 |
)
|
49 |
|
50 |
-
with open(out_filename + ".flac", "rb") as f:
|
51 |
-
s = "\n" + out_filename + "\n"
|
52 |
-
s += base64.b64encode(f.read()).decode()
|
53 |
-
logging.info(s)
|
54 |
-
|
55 |
return out_filename
|
56 |
|
57 |
|
|
|
19 |
# References:
|
20 |
# https://gradio.app/docs/#dropdown
|
21 |
|
|
|
22 |
import logging
|
23 |
import os
|
24 |
import tempfile
|
|
|
46 |
f"ffmpeg -hide_banner -loglevel error -i '{in_filename}' -ar 16000 '{out_filename}.flac'"
|
47 |
)
|
48 |
|
|
|
|
|
|
|
|
|
|
|
49 |
return out_filename
|
50 |
|
51 |
|
model.py
CHANGED
@@ -269,6 +269,39 @@ def _get_aishell2_pretrained_model(
|
|
269 |
return recognizer
|
270 |
|
271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
@lru_cache(maxsize=10)
|
273 |
def _get_gigaspeech_pre_trained_model(
|
274 |
repo_id: str,
|
@@ -839,6 +872,10 @@ chinese_models = {
|
|
839 |
}
|
840 |
|
841 |
english_models = {
|
|
|
|
|
|
|
|
|
842 |
"wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
|
843 |
"yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04": _get_english_model, # noqa
|
844 |
"yfyeung/icefall-asr-finetune-mux-pruned_transducer_stateless7-2023-05-19": _get_english_model, # noqa
|
|
|
269 |
return recognizer
|
270 |
|
271 |
|
272 |
+
@lru_cache(maxsize=10)
|
273 |
+
def _get_gigaspeech_pre_trained_model(
|
274 |
+
repo_id: str, decoding_method: str, num_active_paths: int
|
275 |
+
) -> sherpa_onnx.OfflineRecognizer:
|
276 |
+
assert repo_id in ("tiny.en", "base.en", "small.en", "medium.en"), repo_id
|
277 |
+
name = repo_id
|
278 |
+
full_repo_id = "csukuangfj/sherpa-onnx-whisper-" + name
|
279 |
+
encoder = _get_nn_model_filename(
|
280 |
+
repo_id=full_repo_id,
|
281 |
+
filename=f"{name}-encoder.int8.ort",
|
282 |
+
subfolder=".",
|
283 |
+
)
|
284 |
+
|
285 |
+
decoder = _get_nn_model_filename(
|
286 |
+
repo_id=full_repo_id,
|
287 |
+
filename=f"{name}-decoder.int8.ort",
|
288 |
+
subfolder=".",
|
289 |
+
)
|
290 |
+
|
291 |
+
tokens = _get_token_filename(
|
292 |
+
repo_id=full_repo_id, subfolder=".", filename=f"{name}-tokens.txt"
|
293 |
+
)
|
294 |
+
|
295 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
|
296 |
+
encoder=encoder,
|
297 |
+
decoder=decoder,
|
298 |
+
tokens=tokens,
|
299 |
+
num_threads=2,
|
300 |
+
)
|
301 |
+
|
302 |
+
return recognizer
|
303 |
+
|
304 |
+
|
305 |
@lru_cache(maxsize=10)
|
306 |
def _get_gigaspeech_pre_trained_model(
|
307 |
repo_id: str,
|
|
|
872 |
}
|
873 |
|
874 |
english_models = {
|
875 |
+
"whisper-tiny.en": _get_whisper_model,
|
876 |
+
"whisper-base.en": _get_whisper_model,
|
877 |
+
"whisper-small.en": _get_whisper_model,
|
878 |
+
"whisper-medium.en": _get_whisper_model,
|
879 |
"wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
|
880 |
"yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04": _get_english_model, # noqa
|
881 |
"yfyeung/icefall-asr-finetune-mux-pruned_transducer_stateless7-2023-05-19": _get_english_model, # noqa
|