SiRoZaRuPa
commited on
Commit
โข
6a558fa
1
Parent(s):
2479a16
Upload tokenizer
Browse files- added_tokens.json +2 -3
- special_tokens_map.json +1 -1
- tokenizer_config.json +3 -10
added_tokens.json
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
{
|
2 |
"</s>": 3202,
|
3 |
-
"<pad>": 3203,
|
4 |
"<s>": 3201,
|
5 |
-
"[็ก้ณ]":
|
6 |
-
"[้้ณ]":
|
7 |
}
|
|
|
1 |
{
|
2 |
"</s>": 3202,
|
|
|
3 |
"<s>": 3201,
|
4 |
+
"[็ก้ณ]": 3204,
|
5 |
+
"[้้ณ]": 3203
|
6 |
}
|
special_tokens_map.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"bos_token": "<s>",
|
3 |
"eos_token": "</s>",
|
4 |
-
"pad_token": "
|
5 |
"unk_token": "<unk>"
|
6 |
}
|
|
|
1 |
{
|
2 |
"bos_token": "<s>",
|
3 |
"eos_token": "</s>",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
"unk_token": "<unk>"
|
6 |
}
|
tokenizer_config.json
CHANGED
@@ -11817,14 +11817,6 @@
|
|
11817 |
"special": true
|
11818 |
},
|
11819 |
"3203": {
|
11820 |
-
"content": "<pad>",
|
11821 |
-
"lstrip": false,
|
11822 |
-
"normalized": false,
|
11823 |
-
"rstrip": false,
|
11824 |
-
"single_word": false,
|
11825 |
-
"special": true
|
11826 |
-
},
|
11827 |
-
"3204": {
|
11828 |
"content": "[้้ณ]",
|
11829 |
"lstrip": false,
|
11830 |
"normalized": false,
|
@@ -11832,7 +11824,7 @@
|
|
11832 |
"single_word": false,
|
11833 |
"special": false
|
11834 |
},
|
11835 |
-
"
|
11836 |
"content": "[็ก้ณ]",
|
11837 |
"lstrip": false,
|
11838 |
"normalized": false,
|
@@ -11846,10 +11838,11 @@
|
|
11846 |
"do_lower_case": false,
|
11847 |
"eos_token": "</s>",
|
11848 |
"model_max_length": 1000000000000000019884624838656,
|
11849 |
-
"pad_token": "
|
11850 |
"replace_word_delimiter_char": " ",
|
11851 |
"target_lang": null,
|
11852 |
"tokenizer_class": "Wav2Vec2CTCTokenizer",
|
11853 |
"unk_token": "<unk>",
|
|
|
11854 |
"word_delimiter_token": "|"
|
11855 |
}
|
|
|
11817 |
"special": true
|
11818 |
},
|
11819 |
"3203": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11820 |
"content": "[้้ณ]",
|
11821 |
"lstrip": false,
|
11822 |
"normalized": false,
|
|
|
11824 |
"single_word": false,
|
11825 |
"special": false
|
11826 |
},
|
11827 |
+
"3204": {
|
11828 |
"content": "[็ก้ณ]",
|
11829 |
"lstrip": false,
|
11830 |
"normalized": false,
|
|
|
11838 |
"do_lower_case": false,
|
11839 |
"eos_token": "</s>",
|
11840 |
"model_max_length": 1000000000000000019884624838656,
|
11841 |
+
"pad_token": "[PAD]",
|
11842 |
"replace_word_delimiter_char": " ",
|
11843 |
"target_lang": null,
|
11844 |
"tokenizer_class": "Wav2Vec2CTCTokenizer",
|
11845 |
"unk_token": "<unk>",
|
11846 |
+
"use_fast": false,
|
11847 |
"word_delimiter_token": "|"
|
11848 |
}
|