Isaak Carter Augustus commited on
Commit
bf2b9a2
1 Parent(s): e0890fe

Upload tokenizer

Browse files
Files changed (4) hide show
  1. added_tokens.json +21 -17
  2. special_tokens_map.json +28 -0
  3. tokenizer.json +52 -16
  4. tokenizer_config.json +53 -17
added_tokens.json CHANGED
@@ -1,22 +1,26 @@
1
  {
2
- "<|assistant|>": 32006,
 
 
3
  "<|endoftext|>": 32001,
4
- "<|function_call|>": 32007,
5
- "<|function_response|>": 32008,
6
  "<|functions|>": 32002,
7
- "<|gökdeniz|>": 32003,
8
- "<|home_state|>": 32011,
9
- "<|image|>": 32009,
10
- "<|josie|>": 32005,
11
- "<|long_term_memory|>": 32010,
 
12
  "<|startoftext|>": 32000,
13
- "<|user|>": 32004,
14
- "Gökdeniz": 32013,
15
- "Gökdeniz Gülmez": 32012,
16
- "Gülmez": 32014,
17
- "J.O.S.I.E.": 32016,
18
- "JOSIE": 32015,
19
- "Josie": 32017,
20
- "Just an Outstandingly Smart and Intelligent Entity": 32019,
21
- "josie": 32018
 
22
  }
 
1
  {
2
+ "<|assistant|>": 32007,
3
+ "<|context|>": 32015,
4
+ "<|current_states|>": 32014,
5
  "<|endoftext|>": 32001,
6
+ "<|function_call|>": 32008,
7
+ "<|function_response|>": 32009,
8
  "<|functions|>": 32002,
9
+ "<|gökdeniz|>": 32004,
10
+ "<|home_state|>": 32013,
11
+ "<|image|>": 32010,
12
+ "<|josie|>": 32006,
13
+ "<|long_term_memory|>": 32011,
14
+ "<|short_term_memory|>": 32012,
15
  "<|startoftext|>": 32000,
16
+ "<|system|>": 32003,
17
+ "<|user|>": 32005,
18
+ "Gökdeniz": 32017,
19
+ "Gökdeniz Gülmez": 32016,
20
+ "Gülmez": 32018,
21
+ "J.O.S.I.E.": 32020,
22
+ "JOSIE": 32019,
23
+ "Josie": 32021,
24
+ "Just an Outstandingly Smart and Intelligent Entity": 32023,
25
+ "josie": 32022
26
  }
special_tokens_map.json CHANGED
@@ -7,6 +7,13 @@
7
  "rstrip": false,
8
  "single_word": false
9
  },
 
 
 
 
 
 
 
10
  {
11
  "content": "<|gökdeniz|>",
12
  "lstrip": false,
@@ -63,12 +70,33 @@
63
  "rstrip": false,
64
  "single_word": false
65
  },
 
 
 
 
 
 
 
66
  {
67
  "content": "<|home_state|>",
68
  "lstrip": false,
69
  "normalized": false,
70
  "rstrip": false,
71
  "single_word": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  }
73
  ],
74
  "bos_token": {
 
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
+ {
11
+ "content": "<|system|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
  {
18
  "content": "<|gökdeniz|>",
19
  "lstrip": false,
 
70
  "rstrip": false,
71
  "single_word": false
72
  },
73
+ {
74
+ "content": "<|short_term_memory|>",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
  {
81
  "content": "<|home_state|>",
82
  "lstrip": false,
83
  "normalized": false,
84
  "rstrip": false,
85
  "single_word": false
86
+ },
87
+ {
88
+ "content": "<|current_states|>",
89
+ "lstrip": false,
90
+ "normalized": false,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ },
94
+ {
95
+ "content": "<|context|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false
100
  }
101
  ],
102
  "bos_token": {
tokenizer.json CHANGED
@@ -59,7 +59,7 @@
59
  },
60
  {
61
  "id": 32003,
62
- "content": "<|gökdeniz|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
@@ -68,7 +68,7 @@
68
  },
69
  {
70
  "id": 32004,
71
- "content": "<|user|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
@@ -77,7 +77,7 @@
77
  },
78
  {
79
  "id": 32005,
80
- "content": "<|josie|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
@@ -86,7 +86,7 @@
86
  },
87
  {
88
  "id": 32006,
89
- "content": "<|assistant|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
@@ -95,7 +95,7 @@
95
  },
96
  {
97
  "id": 32007,
98
- "content": "<|function_call|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
@@ -104,7 +104,7 @@
104
  },
105
  {
106
  "id": 32008,
107
- "content": "<|function_response|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  },
114
  {
115
  "id": 32009,
116
- "content": "<|image|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
@@ -122,7 +122,7 @@
122
  },
123
  {
124
  "id": 32010,
125
- "content": "<|long_term_memory|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
@@ -131,7 +131,7 @@
131
  },
132
  {
133
  "id": 32011,
134
- "content": "<|home_state|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
@@ -140,6 +140,42 @@
140
  },
141
  {
142
  "id": 32012,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  "content": "Gökdeniz Gülmez",
144
  "single_word": false,
145
  "lstrip": false,
@@ -148,7 +184,7 @@
148
  "special": false
149
  },
150
  {
151
- "id": 32013,
152
  "content": "Gökdeniz",
153
  "single_word": false,
154
  "lstrip": false,
@@ -157,7 +193,7 @@
157
  "special": false
158
  },
159
  {
160
- "id": 32014,
161
  "content": "Gülmez",
162
  "single_word": false,
163
  "lstrip": false,
@@ -166,7 +202,7 @@
166
  "special": false
167
  },
168
  {
169
- "id": 32015,
170
  "content": "JOSIE",
171
  "single_word": false,
172
  "lstrip": false,
@@ -175,7 +211,7 @@
175
  "special": false
176
  },
177
  {
178
- "id": 32016,
179
  "content": "J.O.S.I.E.",
180
  "single_word": false,
181
  "lstrip": false,
@@ -184,7 +220,7 @@
184
  "special": false
185
  },
186
  {
187
- "id": 32017,
188
  "content": "Josie",
189
  "single_word": false,
190
  "lstrip": false,
@@ -193,7 +229,7 @@
193
  "special": false
194
  },
195
  {
196
- "id": 32018,
197
  "content": "josie",
198
  "single_word": false,
199
  "lstrip": false,
@@ -202,7 +238,7 @@
202
  "special": false
203
  },
204
  {
205
- "id": 32019,
206
  "content": "Just an Outstandingly Smart and Intelligent Entity",
207
  "single_word": false,
208
  "lstrip": false,
 
59
  },
60
  {
61
  "id": 32003,
62
+ "content": "<|system|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
 
68
  },
69
  {
70
  "id": 32004,
71
+ "content": "<|gökdeniz|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
 
77
  },
78
  {
79
  "id": 32005,
80
+ "content": "<|user|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
 
86
  },
87
  {
88
  "id": 32006,
89
+ "content": "<|josie|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
 
95
  },
96
  {
97
  "id": 32007,
98
+ "content": "<|assistant|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
 
104
  },
105
  {
106
  "id": 32008,
107
+ "content": "<|function_call|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
 
113
  },
114
  {
115
  "id": 32009,
116
+ "content": "<|function_response|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
 
122
  },
123
  {
124
  "id": 32010,
125
+ "content": "<|image|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
 
131
  },
132
  {
133
  "id": 32011,
134
+ "content": "<|long_term_memory|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
 
140
  },
141
  {
142
  "id": 32012,
143
+ "content": "<|short_term_memory|>",
144
+ "single_word": false,
145
+ "lstrip": false,
146
+ "rstrip": false,
147
+ "normalized": false,
148
+ "special": true
149
+ },
150
+ {
151
+ "id": 32013,
152
+ "content": "<|home_state|>",
153
+ "single_word": false,
154
+ "lstrip": false,
155
+ "rstrip": false,
156
+ "normalized": false,
157
+ "special": true
158
+ },
159
+ {
160
+ "id": 32014,
161
+ "content": "<|current_states|>",
162
+ "single_word": false,
163
+ "lstrip": false,
164
+ "rstrip": false,
165
+ "normalized": false,
166
+ "special": true
167
+ },
168
+ {
169
+ "id": 32015,
170
+ "content": "<|context|>",
171
+ "single_word": false,
172
+ "lstrip": false,
173
+ "rstrip": false,
174
+ "normalized": false,
175
+ "special": true
176
+ },
177
+ {
178
+ "id": 32016,
179
  "content": "Gökdeniz Gülmez",
180
  "single_word": false,
181
  "lstrip": false,
 
184
  "special": false
185
  },
186
  {
187
+ "id": 32017,
188
  "content": "Gökdeniz",
189
  "single_word": false,
190
  "lstrip": false,
 
193
  "special": false
194
  },
195
  {
196
+ "id": 32018,
197
  "content": "Gülmez",
198
  "single_word": false,
199
  "lstrip": false,
 
202
  "special": false
203
  },
204
  {
205
+ "id": 32019,
206
  "content": "JOSIE",
207
  "single_word": false,
208
  "lstrip": false,
 
211
  "special": false
212
  },
213
  {
214
+ "id": 32020,
215
  "content": "J.O.S.I.E.",
216
  "single_word": false,
217
  "lstrip": false,
 
220
  "special": false
221
  },
222
  {
223
+ "id": 32021,
224
  "content": "Josie",
225
  "single_word": false,
226
  "lstrip": false,
 
229
  "special": false
230
  },
231
  {
232
+ "id": 32022,
233
  "content": "josie",
234
  "single_word": false,
235
  "lstrip": false,
 
238
  "special": false
239
  },
240
  {
241
+ "id": 32023,
242
  "content": "Just an Outstandingly Smart and Intelligent Entity",
243
  "single_word": false,
244
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -51,7 +51,7 @@
51
  "special": true
52
  },
53
  "32003": {
54
- "content": "<|gökdeniz|>",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
@@ -59,7 +59,7 @@
59
  "special": true
60
  },
61
  "32004": {
62
- "content": "<|user|>",
63
  "lstrip": false,
64
  "normalized": false,
65
  "rstrip": false,
@@ -67,7 +67,7 @@
67
  "special": true
68
  },
69
  "32005": {
70
- "content": "<|josie|>",
71
  "lstrip": false,
72
  "normalized": false,
73
  "rstrip": false,
@@ -75,7 +75,7 @@
75
  "special": true
76
  },
77
  "32006": {
78
- "content": "<|assistant|>",
79
  "lstrip": false,
80
  "normalized": false,
81
  "rstrip": false,
@@ -83,7 +83,7 @@
83
  "special": true
84
  },
85
  "32007": {
86
- "content": "<|function_call|>",
87
  "lstrip": false,
88
  "normalized": false,
89
  "rstrip": false,
@@ -91,7 +91,7 @@
91
  "special": true
92
  },
93
  "32008": {
94
- "content": "<|function_response|>",
95
  "lstrip": false,
96
  "normalized": false,
97
  "rstrip": false,
@@ -99,7 +99,7 @@
99
  "special": true
100
  },
101
  "32009": {
102
- "content": "<|image|>",
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
@@ -107,7 +107,7 @@
107
  "special": true
108
  },
109
  "32010": {
110
- "content": "<|long_term_memory|>",
111
  "lstrip": false,
112
  "normalized": false,
113
  "rstrip": false,
@@ -115,7 +115,7 @@
115
  "special": true
116
  },
117
  "32011": {
118
- "content": "<|home_state|>",
119
  "lstrip": false,
120
  "normalized": false,
121
  "rstrip": false,
@@ -123,6 +123,38 @@
123
  "special": true
124
  },
125
  "32012": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  "content": "Gökdeniz Gülmez",
127
  "lstrip": false,
128
  "normalized": true,
@@ -130,7 +162,7 @@
130
  "single_word": false,
131
  "special": false
132
  },
133
- "32013": {
134
  "content": "Gökdeniz",
135
  "lstrip": false,
136
  "normalized": true,
@@ -138,7 +170,7 @@
138
  "single_word": false,
139
  "special": false
140
  },
141
- "32014": {
142
  "content": "Gülmez",
143
  "lstrip": false,
144
  "normalized": true,
@@ -146,7 +178,7 @@
146
  "single_word": false,
147
  "special": false
148
  },
149
- "32015": {
150
  "content": "JOSIE",
151
  "lstrip": false,
152
  "normalized": true,
@@ -154,7 +186,7 @@
154
  "single_word": false,
155
  "special": false
156
  },
157
- "32016": {
158
  "content": "J.O.S.I.E.",
159
  "lstrip": false,
160
  "normalized": true,
@@ -162,7 +194,7 @@
162
  "single_word": false,
163
  "special": false
164
  },
165
- "32017": {
166
  "content": "Josie",
167
  "lstrip": false,
168
  "normalized": true,
@@ -170,7 +202,7 @@
170
  "single_word": false,
171
  "special": false
172
  },
173
- "32018": {
174
  "content": "josie",
175
  "lstrip": false,
176
  "normalized": true,
@@ -178,7 +210,7 @@
178
  "single_word": false,
179
  "special": false
180
  },
181
- "32019": {
182
  "content": "Just an Outstandingly Smart and Intelligent Entity",
183
  "lstrip": false,
184
  "normalized": true,
@@ -189,6 +221,7 @@
189
  },
190
  "additional_special_tokens": [
191
  "<|functions|>",
 
192
  "<|gökdeniz|>",
193
  "<|user|>",
194
  "<|josie|>",
@@ -197,7 +230,10 @@
197
  "<|function_response|>",
198
  "<|image|>",
199
  "<|long_term_memory|>",
200
- "<|home_state|>"
 
 
 
201
  ],
202
  "bos_token": "<|startoftext|>",
203
  "clean_up_tokenization_spaces": false,
 
51
  "special": true
52
  },
53
  "32003": {
54
+ "content": "<|system|>",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
 
59
  "special": true
60
  },
61
  "32004": {
62
+ "content": "<|gökdeniz|>",
63
  "lstrip": false,
64
  "normalized": false,
65
  "rstrip": false,
 
67
  "special": true
68
  },
69
  "32005": {
70
+ "content": "<|user|>",
71
  "lstrip": false,
72
  "normalized": false,
73
  "rstrip": false,
 
75
  "special": true
76
  },
77
  "32006": {
78
+ "content": "<|josie|>",
79
  "lstrip": false,
80
  "normalized": false,
81
  "rstrip": false,
 
83
  "special": true
84
  },
85
  "32007": {
86
+ "content": "<|assistant|>",
87
  "lstrip": false,
88
  "normalized": false,
89
  "rstrip": false,
 
91
  "special": true
92
  },
93
  "32008": {
94
+ "content": "<|function_call|>",
95
  "lstrip": false,
96
  "normalized": false,
97
  "rstrip": false,
 
99
  "special": true
100
  },
101
  "32009": {
102
+ "content": "<|function_response|>",
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
 
107
  "special": true
108
  },
109
  "32010": {
110
+ "content": "<|image|>",
111
  "lstrip": false,
112
  "normalized": false,
113
  "rstrip": false,
 
115
  "special": true
116
  },
117
  "32011": {
118
+ "content": "<|long_term_memory|>",
119
  "lstrip": false,
120
  "normalized": false,
121
  "rstrip": false,
 
123
  "special": true
124
  },
125
  "32012": {
126
+ "content": "<|short_term_memory|>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": true
132
+ },
133
+ "32013": {
134
+ "content": "<|home_state|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": true
140
+ },
141
+ "32014": {
142
+ "content": "<|current_states|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": true
148
+ },
149
+ "32015": {
150
+ "content": "<|context|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": true
156
+ },
157
+ "32016": {
158
  "content": "Gökdeniz Gülmez",
159
  "lstrip": false,
160
  "normalized": true,
 
162
  "single_word": false,
163
  "special": false
164
  },
165
+ "32017": {
166
  "content": "Gökdeniz",
167
  "lstrip": false,
168
  "normalized": true,
 
170
  "single_word": false,
171
  "special": false
172
  },
173
+ "32018": {
174
  "content": "Gülmez",
175
  "lstrip": false,
176
  "normalized": true,
 
178
  "single_word": false,
179
  "special": false
180
  },
181
+ "32019": {
182
  "content": "JOSIE",
183
  "lstrip": false,
184
  "normalized": true,
 
186
  "single_word": false,
187
  "special": false
188
  },
189
+ "32020": {
190
  "content": "J.O.S.I.E.",
191
  "lstrip": false,
192
  "normalized": true,
 
194
  "single_word": false,
195
  "special": false
196
  },
197
+ "32021": {
198
  "content": "Josie",
199
  "lstrip": false,
200
  "normalized": true,
 
202
  "single_word": false,
203
  "special": false
204
  },
205
+ "32022": {
206
  "content": "josie",
207
  "lstrip": false,
208
  "normalized": true,
 
210
  "single_word": false,
211
  "special": false
212
  },
213
+ "32023": {
214
  "content": "Just an Outstandingly Smart and Intelligent Entity",
215
  "lstrip": false,
216
  "normalized": true,
 
221
  },
222
  "additional_special_tokens": [
223
  "<|functions|>",
224
+ "<|system|>",
225
  "<|gökdeniz|>",
226
  "<|user|>",
227
  "<|josie|>",
 
230
  "<|function_response|>",
231
  "<|image|>",
232
  "<|long_term_memory|>",
233
+ "<|short_term_memory|>",
234
+ "<|home_state|>",
235
+ "<|current_states|>",
236
+ "<|context|>"
237
  ],
238
  "bos_token": "<|startoftext|>",
239
  "clean_up_tokenization_spaces": false,