SiRoZaRuPa commited on
Commit
6a558fa
โ€ข
1 Parent(s): 2479a16

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "</s>": 3202,
3
- "<pad>": 3203,
4
  "<s>": 3201,
5
- "[็„ก้Ÿณ]": 3205,
6
- "[้›‘้Ÿณ]": 3204
7
  }
 
1
  {
2
  "</s>": 3202,
 
3
  "<s>": 3201,
4
+ "[็„ก้Ÿณ]": 3204,
5
+ "[้›‘้Ÿณ]": 3203
6
  }
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
- "pad_token": "<pad>",
5
  "unk_token": "<unk>"
6
  }
 
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
  "unk_token": "<unk>"
6
  }
tokenizer_config.json CHANGED
@@ -11817,14 +11817,6 @@
11817
  "special": true
11818
  },
11819
  "3203": {
11820
- "content": "<pad>",
11821
- "lstrip": false,
11822
- "normalized": false,
11823
- "rstrip": false,
11824
- "single_word": false,
11825
- "special": true
11826
- },
11827
- "3204": {
11828
  "content": "[้›‘้Ÿณ]",
11829
  "lstrip": false,
11830
  "normalized": false,
@@ -11832,7 +11824,7 @@
11832
  "single_word": false,
11833
  "special": false
11834
  },
11835
- "3205": {
11836
  "content": "[็„ก้Ÿณ]",
11837
  "lstrip": false,
11838
  "normalized": false,
@@ -11846,10 +11838,11 @@
11846
  "do_lower_case": false,
11847
  "eos_token": "</s>",
11848
  "model_max_length": 1000000000000000019884624838656,
11849
- "pad_token": "<pad>",
11850
  "replace_word_delimiter_char": " ",
11851
  "target_lang": null,
11852
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
11853
  "unk_token": "<unk>",
 
11854
  "word_delimiter_token": "|"
11855
  }
 
11817
  "special": true
11818
  },
11819
  "3203": {
 
 
 
 
 
 
 
 
11820
  "content": "[้›‘้Ÿณ]",
11821
  "lstrip": false,
11822
  "normalized": false,
 
11824
  "single_word": false,
11825
  "special": false
11826
  },
11827
+ "3204": {
11828
  "content": "[็„ก้Ÿณ]",
11829
  "lstrip": false,
11830
  "normalized": false,
 
11838
  "do_lower_case": false,
11839
  "eos_token": "</s>",
11840
  "model_max_length": 1000000000000000019884624838656,
11841
+ "pad_token": "[PAD]",
11842
  "replace_word_delimiter_char": " ",
11843
  "target_lang": null,
11844
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
11845
  "unk_token": "<unk>",
11846
+ "use_fast": false,
11847
  "word_delimiter_token": "|"
11848
  }