Commit
·
4e478c6
1
Parent(s):
817f0f3
add updated gigaspeech model
Browse files
model.py
CHANGED
@@ -880,6 +880,51 @@ def _get_japanese_pre_trained_model(
|
|
880 |
return recognizer
|
881 |
|
882 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
883 |
@lru_cache(maxsize=10)
|
884 |
def _get_paraformer_zh_pre_trained_model(
|
885 |
repo_id: str,
|
@@ -971,6 +1016,7 @@ english_models = {
|
|
971 |
"whisper-base.en": _get_whisper_model,
|
972 |
"whisper-small.en": _get_whisper_model,
|
973 |
# "whisper-medium.en": _get_whisper_model,
|
|
|
974 |
"wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
|
975 |
"yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04": _get_english_model, # noqa
|
976 |
"yfyeung/icefall-asr-finetune-mux-pruned_transducer_stateless7-2023-05-19": _get_english_model, # noqa
|
|
|
880 |
return recognizer
|
881 |
|
882 |
|
883 |
+
@lru_cache(maxsize=10)
|
884 |
+
def _get_gigaspeech_pre_trained_model_onnx(
|
885 |
+
repo_id: str,
|
886 |
+
decoding_method: str,
|
887 |
+
num_active_paths: int,
|
888 |
+
) -> sherpa_onnx.OfflineRecognizer:
|
889 |
+
assert repo_id in [
|
890 |
+
"yfyeung/icefall-asr-gigaspeech-zipformer-2023-10-17",
|
891 |
+
], repo_id
|
892 |
+
|
893 |
+
encoder_model = _get_nn_model_filename(
|
894 |
+
repo_id=repo_id,
|
895 |
+
filename="encoder-epoch-30-avg-9.onnx",
|
896 |
+
subfolder="exp",
|
897 |
+
)
|
898 |
+
|
899 |
+
decoder_model = _get_nn_model_filename(
|
900 |
+
repo_id=repo_id,
|
901 |
+
filename="decoder-epoch-30-avg-9.onnx",
|
902 |
+
subfolder="exp",
|
903 |
+
)
|
904 |
+
|
905 |
+
joiner_model = _get_nn_model_filename(
|
906 |
+
repo_id=repo_id,
|
907 |
+
filename="joiner-epoch-30-avg-9.onnx",
|
908 |
+
subfolder="exp",
|
909 |
+
)
|
910 |
+
|
911 |
+
tokens = _get_token_filename(repo_id=repo_id, subfolder="data/lang_bpe_500")
|
912 |
+
|
913 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
|
914 |
+
tokens=tokens,
|
915 |
+
encoder=encoder_model,
|
916 |
+
decoder=decoder_model,
|
917 |
+
joiner=joiner_model,
|
918 |
+
num_threads=2,
|
919 |
+
sample_rate=16000,
|
920 |
+
feature_dim=80,
|
921 |
+
decoding_method=decoding_method,
|
922 |
+
max_active_paths=num_active_paths,
|
923 |
+
)
|
924 |
+
|
925 |
+
return recognizer
|
926 |
+
|
927 |
+
|
928 |
@lru_cache(maxsize=10)
|
929 |
def _get_paraformer_zh_pre_trained_model(
|
930 |
repo_id: str,
|
|
|
1016 |
"whisper-base.en": _get_whisper_model,
|
1017 |
"whisper-small.en": _get_whisper_model,
|
1018 |
# "whisper-medium.en": _get_whisper_model,
|
1019 |
+
"yfyeung/icefall-asr-gigaspeech-zipformer-2023-10-17": _get_gigaspeech_pre_trained_model_onnx, # noqa
|
1020 |
"wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
|
1021 |
"yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04": _get_english_model, # noqa
|
1022 |
"yfyeung/icefall-asr-finetune-mux-pruned_transducer_stateless7-2023-05-19": _get_english_model, # noqa
|