danielhanchen
commited on
Commit
•
032cd61
1
Parent(s):
8105139
Upload tokenizer
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +1 -7
- tokenizer.json +9 -0
- tokenizer_config.json +9 -1
added_tokens.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"<|endoftext|>": 151643,
|
3 |
"<|im_end|>": 151645,
|
4 |
"<|im_start|>": 151644
|
|
|
1 |
{
|
2 |
+
"<|PAD_TOKEN|>": 151646,
|
3 |
"<|endoftext|>": 151643,
|
4 |
"<|im_end|>": 151645,
|
5 |
"<|im_start|>": 151644
|
special_tokens_map.json
CHANGED
@@ -10,11 +10,5 @@
|
|
10 |
"rstrip": false,
|
11 |
"single_word": false
|
12 |
},
|
13 |
-
"pad_token":
|
14 |
-
"content": "<|endoftext|>",
|
15 |
-
"lstrip": false,
|
16 |
-
"normalized": false,
|
17 |
-
"rstrip": false,
|
18 |
-
"single_word": false
|
19 |
-
}
|
20 |
}
|
|
|
10 |
"rstrip": false,
|
11 |
"single_word": false
|
12 |
},
|
13 |
+
"pad_token": "<|PAD_TOKEN|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
}
|
tokenizer.json
CHANGED
@@ -29,6 +29,15 @@
|
|
29 |
"rstrip": false,
|
30 |
"normalized": false,
|
31 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
}
|
33 |
],
|
34 |
"normalizer": {
|
|
|
29 |
"rstrip": false,
|
30 |
"normalized": false,
|
31 |
"special": true
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"id": 151646,
|
35 |
+
"content": "<|PAD_TOKEN|>",
|
36 |
+
"single_word": false,
|
37 |
+
"lstrip": false,
|
38 |
+
"rstrip": false,
|
39 |
+
"normalized": false,
|
40 |
+
"special": true
|
41 |
}
|
42 |
],
|
43 |
"normalizer": {
|
tokenizer_config.json
CHANGED
@@ -24,6 +24,14 @@
|
|
24 |
"rstrip": false,
|
25 |
"single_word": false,
|
26 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
}
|
28 |
},
|
29 |
"additional_special_tokens": [
|
@@ -36,7 +44,7 @@
|
|
36 |
"eos_token": "<|endoftext|>",
|
37 |
"errors": "replace",
|
38 |
"model_max_length": 32768,
|
39 |
-
"pad_token": "<|
|
40 |
"padding_side": "left",
|
41 |
"split_special_tokens": false,
|
42 |
"tokenizer_class": "Qwen2Tokenizer",
|
|
|
24 |
"rstrip": false,
|
25 |
"single_word": false,
|
26 |
"special": true
|
27 |
+
},
|
28 |
+
"151646": {
|
29 |
+
"content": "<|PAD_TOKEN|>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": false,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
}
|
36 |
},
|
37 |
"additional_special_tokens": [
|
|
|
44 |
"eos_token": "<|endoftext|>",
|
45 |
"errors": "replace",
|
46 |
"model_max_length": 32768,
|
47 |
+
"pad_token": "<|PAD_TOKEN|>",
|
48 |
"padding_side": "left",
|
49 |
"split_special_tokens": false,
|
50 |
"tokenizer_class": "Qwen2Tokenizer",
|