social2468media commited on
Commit
f35b0d9
1 Parent(s): 9df60f8

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +21 -3
  2. tokenizer.json +4 -4
special_tokens_map.json CHANGED
@@ -101,7 +101,25 @@
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
- "eos_token": "</s>",
105
- "pad_token": "<pad>",
106
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
 
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
  }
tokenizer.json CHANGED
@@ -950,8 +950,8 @@
950
  "pre_tokenizer": {
951
  "type": "Metaspace",
952
  "replacement": "▁",
953
- "add_prefix_space": true,
954
- "prepend_scheme": "always"
955
  },
956
  "post_processor": {
957
  "type": "TemplateProcessing",
@@ -1010,8 +1010,8 @@
1010
  "decoder": {
1011
  "type": "Metaspace",
1012
  "replacement": "▁",
1013
- "add_prefix_space": true,
1014
- "prepend_scheme": "always"
1015
  },
1016
  "model": {
1017
  "type": "Unigram",
 
950
  "pre_tokenizer": {
951
  "type": "Metaspace",
952
  "replacement": "▁",
953
+ "prepend_scheme": "always",
954
+ "split": true
955
  },
956
  "post_processor": {
957
  "type": "TemplateProcessing",
 
1010
  "decoder": {
1011
  "type": "Metaspace",
1012
  "replacement": "▁",
1013
+ "prepend_scheme": "always",
1014
+ "split": true
1015
  },
1016
  "model": {
1017
  "type": "Unigram",