samikhan121 commited on
Commit
e3dd340
·
verified ·
1 Parent(s): 0cc161e

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +1 -3
  2. tokenizer_config.json +5 -27
  3. vocab.json +0 -10
added_tokens.json CHANGED
@@ -1,5 +1,3 @@
1
  {
2
- "": 84,
3
- "ঢ়": 85,
4
- "য়": 86
5
  }
 
1
  {
2
+ "<unk>": 74
 
 
3
  }
tokenizer_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "add_blank": false,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "6",
@@ -16,39 +16,17 @@
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
- },
20
- "84": {
21
- "content": "ড়",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": false
27
- },
28
- "85": {
29
- "content": "ঢ়",
30
- "lstrip": false,
31
- "normalized": false,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": false
35
- },
36
- "86": {
37
- "content": "য়",
38
- "lstrip": false,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": false
43
  }
44
  },
45
  "clean_up_tokenization_spaces": true,
 
46
  "is_uroman": false,
47
  "language": "ben",
48
  "model_max_length": 1000000000000000019884624838656,
49
- "normalize": false,
50
  "pad_token": "6",
51
  "phonemize": false,
52
  "tokenizer_class": "VitsTokenizer",
53
- "unk_token": "<unk>"
 
54
  }
 
1
  {
2
+ "add_blank": true,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "6",
 
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  },
21
  "clean_up_tokenization_spaces": true,
22
+ "extra_special_tokens": {},
23
  "is_uroman": false,
24
  "language": "ben",
25
  "model_max_length": 1000000000000000019884624838656,
26
+ "normalize": true,
27
  "pad_token": "6",
28
  "phonemize": false,
29
  "tokenizer_class": "VitsTokenizer",
30
+ "unk_token": "<unk>",
31
+ "verbose": false
32
  }
vocab.json CHANGED
@@ -72,15 +72,5 @@
72
  "ৌ": 44,
73
  "্": 36,
74
  "ৎ": 52,
75
- "০": 74,
76
- "১": 75,
77
- "২": 76,
78
- "৩": 77,
79
- "৪": 78,
80
- "৫": 79,
81
- "৬": 80,
82
- "৭": 81,
83
- "৮": 82,
84
- "৯": 83,
85
  "—": 32
86
  }
 
72
  "ৌ": 44,
73
  "্": 36,
74
  "ৎ": 52,
 
 
 
 
 
 
 
 
 
 
75
  "—": 32
76
  }