social2468media
commited on
Commit
•
f35b0d9
1
Parent(s):
9df60f8
Upload tokenizer
Browse files- special_tokens_map.json +21 -3
- tokenizer.json +4 -4
special_tokens_map.json
CHANGED
@@ -101,7 +101,25 @@
|
|
101 |
"<extra_id_98>",
|
102 |
"<extra_id_99>"
|
103 |
],
|
104 |
-
"eos_token":
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
}
|
|
|
101 |
"<extra_id_98>",
|
102 |
"<extra_id_99>"
|
103 |
],
|
104 |
+
"eos_token": {
|
105 |
+
"content": "</s>",
|
106 |
+
"lstrip": false,
|
107 |
+
"normalized": false,
|
108 |
+
"rstrip": false,
|
109 |
+
"single_word": false
|
110 |
+
},
|
111 |
+
"pad_token": {
|
112 |
+
"content": "<pad>",
|
113 |
+
"lstrip": false,
|
114 |
+
"normalized": false,
|
115 |
+
"rstrip": false,
|
116 |
+
"single_word": false
|
117 |
+
},
|
118 |
+
"unk_token": {
|
119 |
+
"content": "<unk>",
|
120 |
+
"lstrip": false,
|
121 |
+
"normalized": false,
|
122 |
+
"rstrip": false,
|
123 |
+
"single_word": false
|
124 |
+
}
|
125 |
}
|
tokenizer.json
CHANGED
@@ -950,8 +950,8 @@
|
|
950 |
"pre_tokenizer": {
|
951 |
"type": "Metaspace",
|
952 |
"replacement": "▁",
|
953 |
-
"
|
954 |
-
"
|
955 |
},
|
956 |
"post_processor": {
|
957 |
"type": "TemplateProcessing",
|
@@ -1010,8 +1010,8 @@
|
|
1010 |
"decoder": {
|
1011 |
"type": "Metaspace",
|
1012 |
"replacement": "▁",
|
1013 |
-
"
|
1014 |
-
"
|
1015 |
},
|
1016 |
"model": {
|
1017 |
"type": "Unigram",
|
|
|
950 |
"pre_tokenizer": {
|
951 |
"type": "Metaspace",
|
952 |
"replacement": "▁",
|
953 |
+
"prepend_scheme": "always",
|
954 |
+
"split": true
|
955 |
},
|
956 |
"post_processor": {
|
957 |
"type": "TemplateProcessing",
|
|
|
1010 |
"decoder": {
|
1011 |
"type": "Metaspace",
|
1012 |
"replacement": "▁",
|
1013 |
+
"prepend_scheme": "always",
|
1014 |
+
"split": true
|
1015 |
},
|
1016 |
"model": {
|
1017 |
"type": "Unigram",
|