csukuangfj
commited on
Commit
·
ebfb900
1
Parent(s):
aa8a2cf
Add moonshine
Browse files
model.py
CHANGED
@@ -516,6 +516,61 @@ def _get_russian_pre_trained_model(
|
|
516 |
return recognizer
|
517 |
|
518 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
519 |
@lru_cache(maxsize=10)
|
520 |
def _get_whisper_model(
|
521 |
repo_id: str, decoding_method: str, num_active_paths: int
|
@@ -1618,6 +1673,8 @@ english_models = {
|
|
1618 |
"whisper-tiny.en": _get_whisper_model,
|
1619 |
"whisper-base.en": _get_whisper_model,
|
1620 |
"whisper-small.en": _get_whisper_model,
|
|
|
|
|
1621 |
"csukuangfj/sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000": _get_sherpa_onnx_nemo_ctc_models,
|
1622 |
"csukuangfj/sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000": _get_sherpa_onnx_nemo_transducer_models,
|
1623 |
# "whisper-medium.en": _get_whisper_model,
|
|
|
516 |
return recognizer
|
517 |
|
518 |
|
519 |
+
@lru_cache(maxsize=10)
|
520 |
+
def _get_moonshine_model(
|
521 |
+
repo_id: str, decoding_method: str, num_active_paths: int
|
522 |
+
) -> sherpa_onnx.OfflineRecognizer:
|
523 |
+
assert repo_id in ("moonshine-tiny", "moonshine-base"), repo_id
|
524 |
+
|
525 |
+
if repo_id == "moonshine-tiny":
|
526 |
+
full_repo_id = "csukuangfj/sherpa-onnx-moonshine-tiny-en-int8"
|
527 |
+
elif repo_id == "moonshine-base":
|
528 |
+
full_repo_id = "csukuangfj/sherpa-onnx-moonshine-base-en-int8"
|
529 |
+
else:
|
530 |
+
raise ValueError(f"Unknown repo_id: {repo_id}")
|
531 |
+
|
532 |
+
preprocessor = _get_nn_model_filename(
|
533 |
+
repo_id=full_repo_id,
|
534 |
+
filename=f"preprocess.onnx",
|
535 |
+
subfolder=".",
|
536 |
+
)
|
537 |
+
|
538 |
+
encoder = _get_nn_model_filename(
|
539 |
+
repo_id=full_repo_id,
|
540 |
+
filename=f"encode.int8.onnx",
|
541 |
+
subfolder=".",
|
542 |
+
)
|
543 |
+
|
544 |
+
uncached_decoder = _get_nn_model_filename(
|
545 |
+
repo_id=full_repo_id,
|
546 |
+
filename=f"uncached_decode.int8.onnx",
|
547 |
+
subfolder=".",
|
548 |
+
)
|
549 |
+
|
550 |
+
cached_decoder = _get_nn_model_filename(
|
551 |
+
repo_id=full_repo_id,
|
552 |
+
filename=f"cached_decode.int8.onnx",
|
553 |
+
subfolder=".",
|
554 |
+
)
|
555 |
+
|
556 |
+
tokens = _get_token_filename(
|
557 |
+
repo_id=full_repo_id,
|
558 |
+
subfolder=".",
|
559 |
+
filename="tokens.txt",
|
560 |
+
)
|
561 |
+
|
562 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_moonshine(
|
563 |
+
preprocessor=preprocessor,
|
564 |
+
encoder=encoder,
|
565 |
+
uncached_decoder=uncached_decoder,
|
566 |
+
cached_decoder=cached_decoder,
|
567 |
+
tokens=tokens,
|
568 |
+
num_threads=2,
|
569 |
+
)
|
570 |
+
|
571 |
+
return recognizer
|
572 |
+
|
573 |
+
|
574 |
@lru_cache(maxsize=10)
|
575 |
def _get_whisper_model(
|
576 |
repo_id: str, decoding_method: str, num_active_paths: int
|
|
|
1673 |
"whisper-tiny.en": _get_whisper_model,
|
1674 |
"whisper-base.en": _get_whisper_model,
|
1675 |
"whisper-small.en": _get_whisper_model,
|
1676 |
+
"moonshine-tiny": _get_moonshine_model,
|
1677 |
+
"moonshine-base": _get_moonshine_model,
|
1678 |
"csukuangfj/sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000": _get_sherpa_onnx_nemo_ctc_models,
|
1679 |
"csukuangfj/sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000": _get_sherpa_onnx_nemo_transducer_models,
|
1680 |
# "whisper-medium.en": _get_whisper_model,
|