Pclanglais commited on
Commit
90afd97
·
verified ·
1 Parent(s): 8788915

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +1 -93
tokenizer.json CHANGED
@@ -159,98 +159,6 @@
159
  }
160
  ]
161
  },
162
- "post_processor": {
163
- "type": "Sequence",
164
- "processors": [
165
- {
166
- "type": "ByteLevel",
167
- "add_prefix_space": true,
168
- "trim_offsets": false,
169
- "use_regex": true
170
- },
171
- {
172
- "type": "TemplateProcessing",
173
- "single": [
174
- {
175
- "SpecialToken": {
176
- "id": "<|begin_of_text|>",
177
- "type_id": 0
178
- }
179
- },
180
- {
181
- "Sequence": {
182
- "id": "A",
183
- "type_id": 0
184
- }
185
- }
186
- ],
187
- "pair": [
188
- {
189
- "SpecialToken": {
190
- "id": "<|begin_of_text|>",
191
- "type_id": 0
192
- }
193
- },
194
- {
195
- "Sequence": {
196
- "id": "A",
197
- "type_id": 0
198
- }
199
- },
200
- {
201
- "SpecialToken": {
202
- "id": "<|begin_of_text|>",
203
- "type_id": 0
204
- }
205
- },
206
- {
207
- "Sequence": {
208
- "id": "B",
209
- "type_id": 0
210
- }
211
- }
212
- ],
213
- "special_tokens": {
214
- "<|begin_of_text|>": {
215
- "id": "<|begin_of_text|>",
216
- "ids": [
217
- 1
218
- ],
219
- "tokens": [
220
- "<|begin_of_text|>"
221
- ]
222
- },
223
- "<|end_of_text|>": {
224
- "id": "<|end_of_text|>",
225
- "ids": [
226
- 2
227
- ],
228
- "tokens": [
229
- "<|end_of_text|>"
230
- ]
231
- },
232
- "[PAD]": {
233
- "id": "[PAD]",
234
- "ids": [
235
- 3
236
- ],
237
- "tokens": [
238
- "[PAD]"
239
- ]
240
- },
241
- "[UNK]": {
242
- "id": "[UNK]",
243
- "ids": [
244
- 0
245
- ],
246
- "tokens": [
247
- "[UNK]"
248
- ]
249
- }
250
- }
251
- }
252
- ]
253
- },
254
  "decoder": {
255
  "type": "ByteLevel",
256
  "add_prefix_space": true,
@@ -131064,4 +130972,4 @@
131064
  "ĠÏĮ ÏĦαν"
131065
  ]
131066
  }
131067
- }
 
159
  }
160
  ]
161
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  "decoder": {
163
  "type": "ByteLevel",
164
  "add_prefix_space": true,
 
130972
  "ĠÏĮ ÏĦαν"
130973
  ]
130974
  }
130975
+ }