Upload folder using huggingface_hub
Browse files- tokenizer.json +1 -93
tokenizer.json
CHANGED
@@ -159,98 +159,6 @@
|
|
159 |
}
|
160 |
]
|
161 |
},
|
162 |
-
"post_processor": {
|
163 |
-
"type": "Sequence",
|
164 |
-
"processors": [
|
165 |
-
{
|
166 |
-
"type": "ByteLevel",
|
167 |
-
"add_prefix_space": true,
|
168 |
-
"trim_offsets": false,
|
169 |
-
"use_regex": true
|
170 |
-
},
|
171 |
-
{
|
172 |
-
"type": "TemplateProcessing",
|
173 |
-
"single": [
|
174 |
-
{
|
175 |
-
"SpecialToken": {
|
176 |
-
"id": "<|begin_of_text|>",
|
177 |
-
"type_id": 0
|
178 |
-
}
|
179 |
-
},
|
180 |
-
{
|
181 |
-
"Sequence": {
|
182 |
-
"id": "A",
|
183 |
-
"type_id": 0
|
184 |
-
}
|
185 |
-
}
|
186 |
-
],
|
187 |
-
"pair": [
|
188 |
-
{
|
189 |
-
"SpecialToken": {
|
190 |
-
"id": "<|begin_of_text|>",
|
191 |
-
"type_id": 0
|
192 |
-
}
|
193 |
-
},
|
194 |
-
{
|
195 |
-
"Sequence": {
|
196 |
-
"id": "A",
|
197 |
-
"type_id": 0
|
198 |
-
}
|
199 |
-
},
|
200 |
-
{
|
201 |
-
"SpecialToken": {
|
202 |
-
"id": "<|begin_of_text|>",
|
203 |
-
"type_id": 0
|
204 |
-
}
|
205 |
-
},
|
206 |
-
{
|
207 |
-
"Sequence": {
|
208 |
-
"id": "B",
|
209 |
-
"type_id": 0
|
210 |
-
}
|
211 |
-
}
|
212 |
-
],
|
213 |
-
"special_tokens": {
|
214 |
-
"<|begin_of_text|>": {
|
215 |
-
"id": "<|begin_of_text|>",
|
216 |
-
"ids": [
|
217 |
-
1
|
218 |
-
],
|
219 |
-
"tokens": [
|
220 |
-
"<|begin_of_text|>"
|
221 |
-
]
|
222 |
-
},
|
223 |
-
"<|end_of_text|>": {
|
224 |
-
"id": "<|end_of_text|>",
|
225 |
-
"ids": [
|
226 |
-
2
|
227 |
-
],
|
228 |
-
"tokens": [
|
229 |
-
"<|end_of_text|>"
|
230 |
-
]
|
231 |
-
},
|
232 |
-
"[PAD]": {
|
233 |
-
"id": "[PAD]",
|
234 |
-
"ids": [
|
235 |
-
3
|
236 |
-
],
|
237 |
-
"tokens": [
|
238 |
-
"[PAD]"
|
239 |
-
]
|
240 |
-
},
|
241 |
-
"[UNK]": {
|
242 |
-
"id": "[UNK]",
|
243 |
-
"ids": [
|
244 |
-
0
|
245 |
-
],
|
246 |
-
"tokens": [
|
247 |
-
"[UNK]"
|
248 |
-
]
|
249 |
-
}
|
250 |
-
}
|
251 |
-
}
|
252 |
-
]
|
253 |
-
},
|
254 |
"decoder": {
|
255 |
"type": "ByteLevel",
|
256 |
"add_prefix_space": true,
|
@@ -131064,4 +130972,4 @@
|
|
131064 |
"ĠÏĮ ÏĦαν"
|
131065 |
]
|
131066 |
}
|
131067 |
-
}
|
|
|
159 |
}
|
160 |
]
|
161 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
"decoder": {
|
163 |
"type": "ByteLevel",
|
164 |
"add_prefix_space": true,
|
|
|
130972 |
"ĠÏĮ ÏĦαν"
|
130973 |
]
|
130974 |
}
|
130975 |
+
}
|