csukuangfj commited on
Commit
24c2ef7
·
1 Parent(s): 8b52167

add korean

Browse files
examples.py CHANGED
@@ -56,6 +56,14 @@ examples = [
56
  "Yes",
57
  "./test_wavs/paraformer-zh/四川话.wav",
58
  ],
 
 
 
 
 
 
 
 
59
  [
60
  "Russian",
61
  "alphacep/vosk-model-ru",
@@ -405,4 +413,28 @@ examples = [
405
  "No",
406
  "./test_wavs/french/common_voice_fr_27024649.wav",
407
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
  ]
 
56
  "Yes",
57
  "./test_wavs/paraformer-zh/四川话.wav",
58
  ],
59
+ [
60
+ "Korean",
61
+ "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16",
62
+ "greedy_search",
63
+ 4,
64
+ "No",
65
+ "./test_wavs/korean/0.wav",
66
+ ],
67
  [
68
  "Russian",
69
  "alphacep/vosk-model-ru",
 
413
  "No",
414
  "./test_wavs/french/common_voice_fr_27024649.wav",
415
  ],
416
+ [
417
+ "Korean",
418
+ "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16",
419
+ "greedy_search",
420
+ 4,
421
+ "No",
422
+ "./test_wavs/korean/1.wav",
423
+ ],
424
+ [
425
+ "Korean",
426
+ "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16",
427
+ "greedy_search",
428
+ 4,
429
+ "No",
430
+ "./test_wavs/korean/2.wav",
431
+ ],
432
+ [
433
+ "Korean",
434
+ "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16",
435
+ "greedy_search",
436
+ 4,
437
+ "No",
438
+ "./test_wavs/korean/3.wav",
439
+ ],
440
  ]
model.py CHANGED
@@ -208,6 +208,10 @@ def get_pretrained_model(
208
  return russian_models[repo_id](
209
  repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
210
  )
 
 
 
 
211
  else:
212
  raise ValueError(f"Unsupported repo_id: {repo_id}")
213
 
@@ -894,6 +898,7 @@ def _get_streaming_zipformer_pre_trained_model(
894
  ) -> sherpa_onnx.OnlineRecognizer:
895
  assert repo_id in [
896
  "csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20",
 
897
  ], repo_id
898
 
899
  encoder_model = _get_nn_model_filename(
@@ -1356,6 +1361,10 @@ cantonese_models = {
1356
  "zrjin/icefall-asr-mdcc-zipformer-2024-03-11": _get_zrjin_cantonese_pre_trained_model,
1357
  }
1358
 
 
 
 
 
1359
 
1360
  all_models = {
1361
  **chinese_models,
@@ -1369,6 +1378,7 @@ all_models = {
1369
  **german_models,
1370
  **french_models,
1371
  **russian_models,
 
1372
  }
1373
 
1374
  language_to_models = {
@@ -1384,4 +1394,5 @@ language_to_models = {
1384
  "German": list(german_models.keys()),
1385
  "French": list(french_models.keys()),
1386
  "Russian": list(russian_models.keys()),
 
1387
  }
 
208
  return russian_models[repo_id](
209
  repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
210
  )
211
+ elif repo_id in korean_models:
212
+ return korean_models[repo_id](
213
+ repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
214
+ )
215
  else:
216
  raise ValueError(f"Unsupported repo_id: {repo_id}")
217
 
 
898
  ) -> sherpa_onnx.OnlineRecognizer:
899
  assert repo_id in [
900
  "csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20",
901
+ "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16",
902
  ], repo_id
903
 
904
  encoder_model = _get_nn_model_filename(
 
1361
  "zrjin/icefall-asr-mdcc-zipformer-2024-03-11": _get_zrjin_cantonese_pre_trained_model,
1362
  }
1363
 
1364
+ korean_models = {
1365
+ "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": _get_streaming_zipformer_pre_trained_model,
1366
+ }
1367
+
1368
 
1369
  all_models = {
1370
  **chinese_models,
 
1378
  **german_models,
1379
  **french_models,
1380
  **russian_models,
1381
+ **korean_models,
1382
  }
1383
 
1384
  language_to_models = {
 
1394
  "German": list(german_models.keys()),
1395
  "French": list(french_models.keys()),
1396
  "Russian": list(russian_models.keys()),
1397
+ "Korean": list(korean_models.keys()),
1398
  }
test_wavs/korean/0.wav ADDED
Binary file (113 kB). View file
 
test_wavs/korean/1.wav ADDED
Binary file (109 kB). View file
 
test_wavs/korean/2.wav ADDED
Binary file (212 kB). View file
 
test_wavs/korean/3.wav ADDED
Binary file (85.8 kB). View file