csukuangfj
commited on
Commit
·
5e2845b
1
Parent(s):
b11b38b
add a new korean model
Browse files- examples.py +4 -4
- model.py +41 -0
examples.py
CHANGED
@@ -58,7 +58,7 @@ examples = [
|
|
58 |
],
|
59 |
[
|
60 |
"Korean",
|
61 |
-
"k2-fsa/sherpa-onnx-
|
62 |
"greedy_search",
|
63 |
4,
|
64 |
"No",
|
@@ -423,7 +423,7 @@ examples = [
|
|
423 |
],
|
424 |
[
|
425 |
"Korean",
|
426 |
-
"k2-fsa/sherpa-onnx-
|
427 |
"greedy_search",
|
428 |
4,
|
429 |
"No",
|
@@ -431,7 +431,7 @@ examples = [
|
|
431 |
],
|
432 |
[
|
433 |
"Korean",
|
434 |
-
"k2-fsa/sherpa-onnx-
|
435 |
"greedy_search",
|
436 |
4,
|
437 |
"No",
|
@@ -439,7 +439,7 @@ examples = [
|
|
439 |
],
|
440 |
[
|
441 |
"Korean",
|
442 |
-
"k2-fsa/sherpa-onnx-
|
443 |
"greedy_search",
|
444 |
4,
|
445 |
"No",
|
|
|
58 |
],
|
59 |
[
|
60 |
"Korean",
|
61 |
+
"k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24",
|
62 |
"greedy_search",
|
63 |
4,
|
64 |
"No",
|
|
|
423 |
],
|
424 |
[
|
425 |
"Korean",
|
426 |
+
"k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24",
|
427 |
"greedy_search",
|
428 |
4,
|
429 |
"No",
|
|
|
431 |
],
|
432 |
[
|
433 |
"Korean",
|
434 |
+
"k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24",
|
435 |
"greedy_search",
|
436 |
4,
|
437 |
"No",
|
|
|
439 |
],
|
440 |
[
|
441 |
"Korean",
|
442 |
+
"k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24",
|
443 |
"greedy_search",
|
444 |
4,
|
445 |
"No",
|
model.py
CHANGED
@@ -297,6 +297,46 @@ def _get_aishell2_pretrained_model(
|
|
297 |
return recognizer
|
298 |
|
299 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
@lru_cache(maxsize=10)
|
301 |
def _get_yifan_thai_pretrained_model(
|
302 |
repo_id: str, decoding_method: str, num_active_paths: int
|
@@ -1409,6 +1449,7 @@ cantonese_models = {
|
|
1409 |
|
1410 |
korean_models = {
|
1411 |
"k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": _get_streaming_zipformer_pre_trained_model,
|
|
|
1412 |
}
|
1413 |
|
1414 |
thai_models = {
|
|
|
297 |
return recognizer
|
298 |
|
299 |
|
300 |
+
@lru_cache(maxsize=10)
|
301 |
+
def _get_offline_pre_trained_model(
|
302 |
+
repo_id: str, decoding_method: str, num_active_paths: int
|
303 |
+
) -> sherpa_onnx.OfflineRecognizer:
|
304 |
+
assert repo_id in ("k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24",), repo_id
|
305 |
+
|
306 |
+
encoder_model = _get_nn_model_filename(
|
307 |
+
repo_id=repo_id,
|
308 |
+
filename="encoder-epoch-99-avg-1.int8.onnx",
|
309 |
+
subfolder=".",
|
310 |
+
)
|
311 |
+
|
312 |
+
decoder_model = _get_nn_model_filename(
|
313 |
+
repo_id=repo_id,
|
314 |
+
filename="decoder-epoch-99-avg-1.onnx",
|
315 |
+
subfolder=".",
|
316 |
+
)
|
317 |
+
|
318 |
+
joiner_model = _get_nn_model_filename(
|
319 |
+
repo_id=repo_id,
|
320 |
+
filename="joiner-epoch-99-avg-1.onnx",
|
321 |
+
subfolder=".",
|
322 |
+
)
|
323 |
+
|
324 |
+
tokens = _get_token_filename(repo_id=repo_id, subfolder=".")
|
325 |
+
|
326 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
|
327 |
+
tokens=tokens,
|
328 |
+
encoder=encoder_model,
|
329 |
+
decoder=decoder_model,
|
330 |
+
joiner=joiner_model,
|
331 |
+
num_threads=2,
|
332 |
+
sample_rate=16000,
|
333 |
+
feature_dim=80,
|
334 |
+
decoding_method=decoding_method,
|
335 |
+
)
|
336 |
+
|
337 |
+
return recognizer
|
338 |
+
|
339 |
+
|
340 |
@lru_cache(maxsize=10)
|
341 |
def _get_yifan_thai_pretrained_model(
|
342 |
repo_id: str, decoding_method: str, num_active_paths: int
|
|
|
1449 |
|
1450 |
korean_models = {
|
1451 |
"k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": _get_streaming_zipformer_pre_trained_model,
|
1452 |
+
"k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24": _get_offline_pre_trained_model,
|
1453 |
}
|
1454 |
|
1455 |
thai_models = {
|