Spaces:

k2-fsa
/

automatic-speech-recognition

Running

App Files Files Community

csukuangfj commited on Jun 20, 2024

Commit

b11b38b

1 Parent(s): 24c2ef7

Add Thai

Browse files

Files changed (6) hide show

examples.py +24 -0
model.py +52 -0
test_wavs/thai/0.wav +0 -0
test_wavs/thai/1.wav +0 -0
test_wavs/thai/2.wav +0 -0
test_wavs/thai/trans.txt +3 -0

examples.py CHANGED Viewed

@@ -72,6 +72,14 @@ examples = [
         "No",
         "./test_wavs/russian/russian-i-love-you.wav",
     ],
     [
         "Russian",
         "alphacep/vosk-model-ru",
@@ -437,4 +445,20 @@ examples = [
         "No",
         "./test_wavs/korean/3.wav",
     ],
 ]

         "No",
         "./test_wavs/russian/russian-i-love-you.wav",
     ],
+    [
+        "Thai",
+        "yfyeung/icefall-asr-gigaspeech2-th-zipformer-2024-06-20",
+        "greedy_search",
+        4,
+        "No",
+        "./test_wavs/thai/0.wav",
+    ],
     [
         "Russian",
         "alphacep/vosk-model-ru",
         "No",
         "./test_wavs/korean/3.wav",
     ],
+    [
+        "Thai",
+        "yfyeung/icefall-asr-gigaspeech2-th-zipformer-2024-06-20",
+        "greedy_search",
+        4,
+        "No",
+        "./test_wavs/thai/1.wav",
+    ],
+    [
+        "Thai",
+        "yfyeung/icefall-asr-gigaspeech2-th-zipformer-2024-06-20",
+        "greedy_search",
+        4,
+        "No",
+        "./test_wavs/thai/2.wav",
+    ],
 ]

model.py CHANGED Viewed

@@ -212,6 +212,10 @@ def get_pretrained_model(
         return korean_models[repo_id](
             repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
         )
     else:
         raise ValueError(f"Unsupported repo_id: {repo_id}")
@@ -293,6 +297,48 @@ def _get_aishell2_pretrained_model(
     return recognizer
 @lru_cache(maxsize=10)
 def _get_zrjin_cantonese_pre_trained_model(
     repo_id: str, decoding_method: str, num_active_paths: int
@@ -1365,6 +1411,10 @@ korean_models = {
     "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": _get_streaming_zipformer_pre_trained_model,
 }
 all_models = {
     **chinese_models,
@@ -1379,6 +1429,7 @@ all_models = {
     **french_models,
     **russian_models,
     **korean_models,
 }
 language_to_models = {
@@ -1395,4 +1446,5 @@ language_to_models = {
     "French": list(french_models.keys()),
     "Russian": list(russian_models.keys()),
     "Korean": list(korean_models.keys()),
 }

         return korean_models[repo_id](
             repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
         )
+    elif repo_id in thai_models:
+        return thai_models[repo_id](
+            repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
+        )
     else:
         raise ValueError(f"Unsupported repo_id: {repo_id}")
     return recognizer
+@lru_cache(maxsize=10)
+def _get_yifan_thai_pretrained_model(
+    repo_id: str, decoding_method: str, num_active_paths: int
+) -> sherpa_onnx.OfflineRecognizer:
+    assert repo_id in (
+        "yfyeung/icefall-asr-gigaspeech2-th-zipformer-2024-06-20",
+    ), repo_id
+    encoder_model = _get_nn_model_filename(
+        repo_id=repo_id,
+        filename="encoder-epoch-12-avg-5.int8.onnx",
+        subfolder="exp",
+    )
+    decoder_model = _get_nn_model_filename(
+        repo_id=repo_id,
+        filename="decoder-epoch-12-avg-5.onnx",
+        subfolder="exp",
+    )
+    joiner_model = _get_nn_model_filename(
+        repo_id=repo_id,
+        filename="joiner-epoch-12-avg-5.int8.onnx",
+        subfolder="exp",
+    )
+    tokens = _get_token_filename(repo_id=repo_id, subfolder="data/lang_bpe_2000")
+    recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
+        tokens=tokens,
+        encoder=encoder_model,
+        decoder=decoder_model,
+        joiner=joiner_model,
+        num_threads=2,
+        sample_rate=16000,
+        feature_dim=80,
+        decoding_method=decoding_method,
+    )
+    return recognizer
 @lru_cache(maxsize=10)
 def _get_zrjin_cantonese_pre_trained_model(
     repo_id: str, decoding_method: str, num_active_paths: int
     "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": _get_streaming_zipformer_pre_trained_model,
 }
+thai_models = {
+    "yfyeung/icefall-asr-gigaspeech2-th-zipformer-2024-06-20": _get_yifan_thai_pretrained_model,
+}
 all_models = {
     **chinese_models,
     **french_models,
     **russian_models,
     **korean_models,
+    **thai_models,
 }
 language_to_models = {
     "French": list(french_models.keys()),
     "Russian": list(russian_models.keys()),
     "Korean": list(korean_models.keys()),
+    "Thai": list(thai_models.keys()),
 }

test_wavs/thai/0.wav ADDED Viewed

Binary file (144 kB). View file

test_wavs/thai/1.wav ADDED Viewed

Binary file (137 kB). View file

test_wavs/thai/2.wav ADDED Viewed

Binary file (316 kB). View file

test_wavs/thai/trans.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+0	ก็เดี๋ยวเกมในนัดต่อไปต้องไปเจอกับทางอินโดนีเซียนะครับ
+1	ก็ไม่ได้เน้นเรื่องของผลการแข่งขันอยู่แล้วครับเหมือนที่คาร์ลอสเซซาร์นั้นได้บอกไว้นะครับ
+2	เกมในเกมที่แล้วเนี่ยตอนพักครึ่งหลังเนี่ยเหมือนคาร์ลอสจะบอกว่าจริงจริงจะไม่ส่งมูฮัมหมัดลงด้วยซ้ําแล้วนะครับแต่ว่าเหมือนกับท้ายเกมเนี่ยส่งไปด้วยความมั่นใจแล้วโอ้โหประตูที่สาม