Commit
·
8c098aa
1
Parent(s):
697a4a4
update models
Browse files
model.py
CHANGED
@@ -794,7 +794,7 @@ def _get_french_pre_trained_model(
|
|
794 |
repo_id: str,
|
795 |
decoding_method: str,
|
796 |
num_active_paths: int,
|
797 |
-
) -> sherpa_onnx.
|
798 |
assert repo_id in [
|
799 |
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
|
800 |
], repo_id
|
@@ -834,6 +834,51 @@ def _get_french_pre_trained_model(
|
|
834 |
return recognizer
|
835 |
|
836 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
837 |
@lru_cache(maxsize=10)
|
838 |
def _get_japanese_pre_trained_model(
|
839 |
repo_id: str,
|
@@ -1007,6 +1052,8 @@ def _get_paraformer_pre_trained_model(
|
|
1007 |
) -> sherpa_onnx.OfflineRecognizer:
|
1008 |
assert repo_id in [
|
1009 |
"csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28",
|
|
|
|
|
1010 |
"csukuangfj/sherpa-onnx-paraformer-trilingual-zh-cantonese-en",
|
1011 |
], repo_id
|
1012 |
|
@@ -1129,10 +1176,9 @@ def _get_multi_zh_hans_pre_trained_model(
|
|
1129 |
|
1130 |
|
1131 |
chinese_models = {
|
1132 |
-
"csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28": _get_paraformer_pre_trained_model,
|
1133 |
-
"csukuangfj/sherpa-onnx-paraformer-zh-small-2024-03-09": _get_paraformer_pre_trained_model,
|
1134 |
"csukuangfj/sherpa-onnx-paraformer-zh-2024-03-09": _get_paraformer_pre_trained_model,
|
1135 |
"luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2": _get_wenetspeech_pre_trained_model, # noqa
|
|
|
1136 |
"zrjin/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2": _get_multi_zh_hans_pre_trained_model, # noqa
|
1137 |
"zrjin/icefall-asr-aishell-zipformer-large-2023-10-24": _get_aishell_pre_trained_model, # noqa
|
1138 |
"zrjin/icefall-asr-aishell-zipformer-small-2023-10-24": _get_aishell_pre_trained_model, # noqa
|
@@ -1171,6 +1217,8 @@ english_models = {
|
|
1171 |
}
|
1172 |
|
1173 |
chinese_english_mixed_models = {
|
|
|
|
|
1174 |
"ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh": _get_chinese_english_mixed_model,
|
1175 |
"luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5": _get_chinese_english_mixed_model, # noqa
|
1176 |
}
|
@@ -1203,8 +1251,8 @@ russian_models = {
|
|
1203 |
}
|
1204 |
|
1205 |
chinese_cantonese_english_models = {
|
1206 |
-
"csukuangfj/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en": _get_streaming_paraformer_zh_yue_en_pre_trained_model,
|
1207 |
"csukuangfj/sherpa-onnx-paraformer-trilingual-zh-cantonese-en": _get_paraformer_pre_trained_model,
|
|
|
1208 |
}
|
1209 |
|
1210 |
|
|
|
794 |
repo_id: str,
|
795 |
decoding_method: str,
|
796 |
num_active_paths: int,
|
797 |
+
) -> sherpa_onnx.OnlineRecognizer:
|
798 |
assert repo_id in [
|
799 |
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
|
800 |
], repo_id
|
|
|
834 |
return recognizer
|
835 |
|
836 |
|
837 |
+
@lru_cache(maxsize=10)
|
838 |
+
def _get_streaming_zipformer_pre_trained_model(
|
839 |
+
repo_id: str,
|
840 |
+
decoding_method: str,
|
841 |
+
num_active_paths: int,
|
842 |
+
) -> sherpa_onnx.OnlineRecognizer:
|
843 |
+
assert repo_id in [
|
844 |
+
"csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20",
|
845 |
+
], repo_id
|
846 |
+
|
847 |
+
encoder_model = _get_nn_model_filename(
|
848 |
+
repo_id=repo_id,
|
849 |
+
filename="encoder-epoch-99-avg-1.onnx",
|
850 |
+
subfolder=".",
|
851 |
+
)
|
852 |
+
|
853 |
+
decoder_model = _get_nn_model_filename(
|
854 |
+
repo_id=repo_id,
|
855 |
+
filename="decoder-epoch-99-avg-1.onnx",
|
856 |
+
subfolder=".",
|
857 |
+
)
|
858 |
+
|
859 |
+
joiner_model = _get_nn_model_filename(
|
860 |
+
repo_id=repo_id,
|
861 |
+
filename="joiner-epoch-99-avg-1.onnx",
|
862 |
+
subfolder=".",
|
863 |
+
)
|
864 |
+
|
865 |
+
tokens = _get_token_filename(repo_id=repo_id, subfolder=".")
|
866 |
+
|
867 |
+
recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
|
868 |
+
tokens=tokens,
|
869 |
+
encoder=encoder_model,
|
870 |
+
decoder=decoder_model,
|
871 |
+
joiner=joiner_model,
|
872 |
+
num_threads=2,
|
873 |
+
sample_rate=16000,
|
874 |
+
feature_dim=80,
|
875 |
+
decoding_method=decoding_method,
|
876 |
+
max_active_paths=num_active_paths,
|
877 |
+
)
|
878 |
+
|
879 |
+
return recognizer
|
880 |
+
|
881 |
+
|
882 |
@lru_cache(maxsize=10)
|
883 |
def _get_japanese_pre_trained_model(
|
884 |
repo_id: str,
|
|
|
1052 |
) -> sherpa_onnx.OfflineRecognizer:
|
1053 |
assert repo_id in [
|
1054 |
"csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28",
|
1055 |
+
"csukuangfj/sherpa-onnx-paraformer-zh-2024-03-09",
|
1056 |
+
"csukuangfj/sherpa-onnx-paraformer-zh-small-2024-03-09",
|
1057 |
"csukuangfj/sherpa-onnx-paraformer-trilingual-zh-cantonese-en",
|
1058 |
], repo_id
|
1059 |
|
|
|
1176 |
|
1177 |
|
1178 |
chinese_models = {
|
|
|
|
|
1179 |
"csukuangfj/sherpa-onnx-paraformer-zh-2024-03-09": _get_paraformer_pre_trained_model,
|
1180 |
"luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2": _get_wenetspeech_pre_trained_model, # noqa
|
1181 |
+
"csukuangfj/sherpa-onnx-paraformer-zh-small-2024-03-09": _get_paraformer_pre_trained_model,
|
1182 |
"zrjin/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2": _get_multi_zh_hans_pre_trained_model, # noqa
|
1183 |
"zrjin/icefall-asr-aishell-zipformer-large-2023-10-24": _get_aishell_pre_trained_model, # noqa
|
1184 |
"zrjin/icefall-asr-aishell-zipformer-small-2023-10-24": _get_aishell_pre_trained_model, # noqa
|
|
|
1217 |
}
|
1218 |
|
1219 |
chinese_english_mixed_models = {
|
1220 |
+
"csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": _get_streaming_zipformer_pre_trained_model,
|
1221 |
+
"csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28": _get_paraformer_pre_trained_model,
|
1222 |
"ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh": _get_chinese_english_mixed_model,
|
1223 |
"luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5": _get_chinese_english_mixed_model, # noqa
|
1224 |
}
|
|
|
1251 |
}
|
1252 |
|
1253 |
chinese_cantonese_english_models = {
|
|
|
1254 |
"csukuangfj/sherpa-onnx-paraformer-trilingual-zh-cantonese-en": _get_paraformer_pre_trained_model,
|
1255 |
+
"csukuangfj/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en": _get_streaming_paraformer_zh_yue_en_pre_trained_model,
|
1256 |
}
|
1257 |
|
1258 |
|