khulnasoft commited on
Commit
ce6b333
·
verified ·
1 Parent(s): d34a4ab

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +142 -13
tokenizer_config.json CHANGED
@@ -1,6 +1,136 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "bos_token": {
5
  "__type": "AddedToken",
6
  "content": "<|begin▁of▁sentence|>",
@@ -8,8 +138,7 @@
8
  "normalized": true,
9
  "rstrip": false,
10
  "single_word": false
11
- },
12
- "clean_up_tokenization_spaces": false,
13
  "eos_token": {
14
  "__type": "AddedToken",
15
  "content": "<|end▁of▁sentence|>",
@@ -17,18 +146,18 @@
17
  "normalized": true,
18
  "rstrip": false,
19
  "single_word": false
 
20
  },
21
- "legacy": true,
 
 
 
 
 
22
  "model_max_length": 16384,
23
- "pad_token": {
24
- "__type": "AddedToken",
25
- "content": "<|end▁of▁sentence|>",
26
- "lstrip": false,
27
- "normalized": true,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- "sp_model_kwargs": {},
32
  "unk_token": null,
33
- "tokenizer_class": "LlamaTokenizerFast"
34
  }
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+
5
+ "added_tokens_decoder": {
6
+ "100000": {
7
+ "content": "<|begin▁of▁sentence|>",
8
+ "lstrip": false,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "100001": {
15
+ "content": "<|end▁of▁sentence|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "100002": {
23
+ "content": "ø",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": false
29
+ },
30
+ "100003": {
31
+ "content": "ö",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": false
37
+ },
38
+ "100004": {
39
+ "content": "ú",
40
+ "lstrip": false,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": false
45
+ },
46
+ "100005": {
47
+ "content": "ÿ",
48
+ "lstrip": false,
49
+ "normalized": true,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": false
53
+ },
54
+ "100006": {
55
+ "content": "õ",
56
+ "lstrip": false,
57
+ "normalized": true,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": false
61
+ },
62
+ "100007": {
63
+ "content": "÷",
64
+ "lstrip": false,
65
+ "normalized": true,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": false
69
+ },
70
+ "100008": {
71
+ "content": "û",
72
+ "lstrip": false,
73
+ "normalized": true,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": false
77
+ },
78
+ "100009": {
79
+ "content": "ý",
80
+ "lstrip": false,
81
+ "normalized": true,
82
+ "rstrip": false,
83
+ "single_word": false,
84
+ "special": false
85
+ },
86
+ "100010": {
87
+ "content": "À",
88
+ "lstrip": false,
89
+ "normalized": true,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": false
93
+ },
94
+ "100011": {
95
+ "content": "ù",
96
+ "lstrip": false,
97
+ "normalized": true,
98
+ "rstrip": false,
99
+ "single_word": false,
100
+ "special": false
101
+ },
102
+ "100012": {
103
+ "content": "Á",
104
+ "lstrip": false,
105
+ "normalized": true,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": false
109
+ },
110
+ "100013": {
111
+ "content": "þ",
112
+ "lstrip": false,
113
+ "normalized": true,
114
+ "rstrip": false,
115
+ "single_word": false,
116
+ "special": false
117
+ },
118
+ "100014": {
119
+ "content": "ü",
120
+ "lstrip": false,
121
+ "normalized": true,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": false
125
+ },
126
+ "100015": {
127
+ "content": "<image_placeholder>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": true
133
+ },
134
  "bos_token": {
135
  "__type": "AddedToken",
136
  "content": "<|begin▁of▁sentence|>",
 
138
  "normalized": true,
139
  "rstrip": false,
140
  "single_word": false
141
+ },
 
142
  "eos_token": {
143
  "__type": "AddedToken",
144
  "content": "<|end▁of▁sentence|>",
 
146
  "normalized": true,
147
  "rstrip": false,
148
  "single_word": false
149
+ }
150
  },
151
+ "additional_special_tokens": [
152
+ "<image_placeholder>"
153
+ ],
154
+ "bos_token": "<|begin▁of▁sentence|>",
155
+ "clean_up_tokenization_spaces": false,
156
+ "eos_token": "<|end▁of▁sentence|>",
157
  "model_max_length": 16384,
158
+ "pad_token": null,
159
+ "processor_class": "VLChatProcessor",
160
+ "tokenizer_class": "LlamaTokenizer",
 
 
 
 
 
 
161
  "unk_token": null,
162
+ "use_default_system_prompt": false
163
  }