csukuangfj commited on
Commit
ebfb900
·
1 Parent(s): aa8a2cf

Add moonshine

Browse files
Files changed (1) hide show
  1. model.py +57 -0
model.py CHANGED
@@ -516,6 +516,61 @@ def _get_russian_pre_trained_model(
516
  return recognizer
517
 
518
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
  @lru_cache(maxsize=10)
520
  def _get_whisper_model(
521
  repo_id: str, decoding_method: str, num_active_paths: int
@@ -1618,6 +1673,8 @@ english_models = {
1618
  "whisper-tiny.en": _get_whisper_model,
1619
  "whisper-base.en": _get_whisper_model,
1620
  "whisper-small.en": _get_whisper_model,
 
 
1621
  "csukuangfj/sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000": _get_sherpa_onnx_nemo_ctc_models,
1622
  "csukuangfj/sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000": _get_sherpa_onnx_nemo_transducer_models,
1623
  # "whisper-medium.en": _get_whisper_model,
 
516
  return recognizer
517
 
518
 
519
+ @lru_cache(maxsize=10)
520
+ def _get_moonshine_model(
521
+ repo_id: str, decoding_method: str, num_active_paths: int
522
+ ) -> sherpa_onnx.OfflineRecognizer:
523
+ assert repo_id in ("moonshine-tiny", "moonshine-base"), repo_id
524
+
525
+ if repo_id == "moonshine-tiny":
526
+ full_repo_id = "csukuangfj/sherpa-onnx-moonshine-tiny-en-int8"
527
+ elif repo_id == "moonshine-base":
528
+ full_repo_id = "csukuangfj/sherpa-onnx-moonshine-base-en-int8"
529
+ else:
530
+ raise ValueError(f"Unknown repo_id: {repo_id}")
531
+
532
+ preprocessor = _get_nn_model_filename(
533
+ repo_id=full_repo_id,
534
+ filename=f"preprocess.onnx",
535
+ subfolder=".",
536
+ )
537
+
538
+ encoder = _get_nn_model_filename(
539
+ repo_id=full_repo_id,
540
+ filename=f"encode.int8.onnx",
541
+ subfolder=".",
542
+ )
543
+
544
+ uncached_decoder = _get_nn_model_filename(
545
+ repo_id=full_repo_id,
546
+ filename=f"uncached_decode.int8.onnx",
547
+ subfolder=".",
548
+ )
549
+
550
+ cached_decoder = _get_nn_model_filename(
551
+ repo_id=full_repo_id,
552
+ filename=f"cached_decode.int8.onnx",
553
+ subfolder=".",
554
+ )
555
+
556
+ tokens = _get_token_filename(
557
+ repo_id=full_repo_id,
558
+ subfolder=".",
559
+ filename="tokens.txt",
560
+ )
561
+
562
+ recognizer = sherpa_onnx.OfflineRecognizer.from_moonshine(
563
+ preprocessor=preprocessor,
564
+ encoder=encoder,
565
+ uncached_decoder=uncached_decoder,
566
+ cached_decoder=cached_decoder,
567
+ tokens=tokens,
568
+ num_threads=2,
569
+ )
570
+
571
+ return recognizer
572
+
573
+
574
  @lru_cache(maxsize=10)
575
  def _get_whisper_model(
576
  repo_id: str, decoding_method: str, num_active_paths: int
 
1673
  "whisper-tiny.en": _get_whisper_model,
1674
  "whisper-base.en": _get_whisper_model,
1675
  "whisper-small.en": _get_whisper_model,
1676
+ "moonshine-tiny": _get_moonshine_model,
1677
+ "moonshine-base": _get_moonshine_model,
1678
  "csukuangfj/sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000": _get_sherpa_onnx_nemo_ctc_models,
1679
  "csukuangfj/sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000": _get_sherpa_onnx_nemo_transducer_models,
1680
  # "whisper-medium.en": _get_whisper_model,