bennny674 commited on
Commit
5c7b473
·
verified ·
1 Parent(s): 8983b82

Upload 33 files

Browse files
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "HuggingFaceTB/SmolLM2-360M",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 0,
9
+ "eos_token_id": 0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 960,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2560,
15
+ "is_llama_config": true,
16
+ "max_position_embeddings": 8192,
17
+ "mlp_bias": false,
18
+ "model_type": "mixtral",
19
+ "num_attention_heads": 15,
20
+ "num_experts_per_tok": 8,
21
+ "num_hidden_layers": 32,
22
+ "num_key_value_heads": 5,
23
+ "num_local_experts": 256,
24
+ "output_router_logits": false,
25
+ "pretraining_tp": 1,
26
+ "rms_norm_eps": 1e-05,
27
+ "rope_interleaved": false,
28
+ "rope_scaling": null,
29
+ "rope_theta": 100000,
30
+ "router_aux_loss_coef": 0.001,
31
+ "router_jitter_noise": 0.0,
32
+ "sliding_window": null,
33
+ "tie_word_embeddings": true,
34
+ "torch_dtype": "bfloat16",
35
+ "transformers_version": "4.47.1",
36
+ "use_cache": true,
37
+ "vocab_size": 49152
38
+ }
mergekit_moe_config.yml ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: HuggingFaceTB/SmolLM2-360M
2
+ gate_mode: random # one of "hidden", "cheap_embed", or "random"
3
+ dtype: bfloat16 # output dtype (float32, float16, or bfloat16)
4
+ ## (optional)
5
+ experts_per_token: 8
6
+ experts:
7
+ - source_model: HuggingFaceTB/SmolLM2-360M
8
+ - source_model: HuggingFaceTB/SmolLM2-360M
9
+ - source_model: HuggingFaceTB/SmolLM2-360M
10
+ - source_model: HuggingFaceTB/SmolLM2-360M
11
+ - source_model: HuggingFaceTB/SmolLM2-360M
12
+ - source_model: HuggingFaceTB/SmolLM2-360M
13
+ - source_model: HuggingFaceTB/SmolLM2-360M
14
+ - source_model: HuggingFaceTB/SmolLM2-360M
15
+ - source_model: HuggingFaceTB/SmolLM2-360M
16
+ - source_model: HuggingFaceTB/SmolLM2-360M
17
+ - source_model: HuggingFaceTB/SmolLM2-360M
18
+ - source_model: HuggingFaceTB/SmolLM2-360M
19
+ - source_model: HuggingFaceTB/SmolLM2-360M
20
+ - source_model: HuggingFaceTB/SmolLM2-360M
21
+ - source_model: HuggingFaceTB/SmolLM2-360M
22
+ - source_model: HuggingFaceTB/SmolLM2-360M
23
+ - source_model: HuggingFaceTB/SmolLM2-360M
24
+ - source_model: HuggingFaceTB/SmolLM2-360M
25
+ - source_model: HuggingFaceTB/SmolLM2-360M
26
+ - source_model: HuggingFaceTB/SmolLM2-360M
27
+ - source_model: HuggingFaceTB/SmolLM2-360M
28
+ - source_model: HuggingFaceTB/SmolLM2-360M
29
+ - source_model: HuggingFaceTB/SmolLM2-360M
30
+ - source_model: HuggingFaceTB/SmolLM2-360M
31
+ - source_model: HuggingFaceTB/SmolLM2-360M
32
+ - source_model: HuggingFaceTB/SmolLM2-360M
33
+ - source_model: HuggingFaceTB/SmolLM2-360M
34
+ - source_model: HuggingFaceTB/SmolLM2-360M
35
+ - source_model: HuggingFaceTB/SmolLM2-360M
36
+ - source_model: HuggingFaceTB/SmolLM2-360M
37
+ - source_model: HuggingFaceTB/SmolLM2-360M
38
+ - source_model: HuggingFaceTB/SmolLM2-360M
39
+ - source_model: HuggingFaceTB/SmolLM2-360M
40
+ - source_model: HuggingFaceTB/SmolLM2-360M
41
+ - source_model: HuggingFaceTB/SmolLM2-360M
42
+ - source_model: HuggingFaceTB/SmolLM2-360M
43
+ - source_model: HuggingFaceTB/SmolLM2-360M
44
+ - source_model: HuggingFaceTB/SmolLM2-360M
45
+ - source_model: HuggingFaceTB/SmolLM2-360M
46
+ - source_model: HuggingFaceTB/SmolLM2-360M
47
+ - source_model: HuggingFaceTB/SmolLM2-360M
48
+ - source_model: HuggingFaceTB/SmolLM2-360M
49
+ - source_model: HuggingFaceTB/SmolLM2-360M
50
+ - source_model: HuggingFaceTB/SmolLM2-360M
51
+ - source_model: HuggingFaceTB/SmolLM2-360M
52
+ - source_model: HuggingFaceTB/SmolLM2-360M
53
+ - source_model: HuggingFaceTB/SmolLM2-360M
54
+ - source_model: HuggingFaceTB/SmolLM2-360M
55
+ - source_model: HuggingFaceTB/SmolLM2-360M
56
+ - source_model: HuggingFaceTB/SmolLM2-360M
57
+ - source_model: HuggingFaceTB/SmolLM2-360M
58
+ - source_model: HuggingFaceTB/SmolLM2-360M
59
+ - source_model: HuggingFaceTB/SmolLM2-360M
60
+ - source_model: HuggingFaceTB/SmolLM2-360M
61
+ - source_model: HuggingFaceTB/SmolLM2-360M
62
+ - source_model: HuggingFaceTB/SmolLM2-360M
63
+ - source_model: HuggingFaceTB/SmolLM2-360M
64
+ - source_model: HuggingFaceTB/SmolLM2-360M
65
+ - source_model: HuggingFaceTB/SmolLM2-360M
66
+ - source_model: HuggingFaceTB/SmolLM2-360M
67
+ - source_model: HuggingFaceTB/SmolLM2-360M
68
+ - source_model: HuggingFaceTB/SmolLM2-360M
69
+ - source_model: HuggingFaceTB/SmolLM2-360M
70
+ - source_model: HuggingFaceTB/SmolLM2-360M
71
+ - source_model: HuggingFaceTB/SmolLM2-360M
72
+ - source_model: HuggingFaceTB/SmolLM2-360M
73
+ - source_model: HuggingFaceTB/SmolLM2-360M
74
+ - source_model: HuggingFaceTB/SmolLM2-360M
75
+ - source_model: HuggingFaceTB/SmolLM2-360M
76
+ - source_model: HuggingFaceTB/SmolLM2-360M
77
+ - source_model: HuggingFaceTB/SmolLM2-360M
78
+ - source_model: HuggingFaceTB/SmolLM2-360M
79
+ - source_model: HuggingFaceTB/SmolLM2-360M
80
+ - source_model: HuggingFaceTB/SmolLM2-360M
81
+ - source_model: HuggingFaceTB/SmolLM2-360M
82
+ - source_model: HuggingFaceTB/SmolLM2-360M
83
+ - source_model: HuggingFaceTB/SmolLM2-360M
84
+ - source_model: HuggingFaceTB/SmolLM2-360M
85
+ - source_model: HuggingFaceTB/SmolLM2-360M
86
+ - source_model: HuggingFaceTB/SmolLM2-360M
87
+ - source_model: HuggingFaceTB/SmolLM2-360M
88
+ - source_model: HuggingFaceTB/SmolLM2-360M
89
+ - source_model: HuggingFaceTB/SmolLM2-360M
90
+ - source_model: HuggingFaceTB/SmolLM2-360M
91
+ - source_model: HuggingFaceTB/SmolLM2-360M
92
+ - source_model: HuggingFaceTB/SmolLM2-360M
93
+ - source_model: HuggingFaceTB/SmolLM2-360M
94
+ - source_model: HuggingFaceTB/SmolLM2-360M
95
+ - source_model: HuggingFaceTB/SmolLM2-360M
96
+ - source_model: HuggingFaceTB/SmolLM2-360M
97
+ - source_model: HuggingFaceTB/SmolLM2-360M
98
+ - source_model: HuggingFaceTB/SmolLM2-360M
99
+ - source_model: HuggingFaceTB/SmolLM2-360M
100
+ - source_model: HuggingFaceTB/SmolLM2-360M
101
+ - source_model: HuggingFaceTB/SmolLM2-360M
102
+ - source_model: HuggingFaceTB/SmolLM2-360M
103
+ - source_model: HuggingFaceTB/SmolLM2-360M
104
+ - source_model: HuggingFaceTB/SmolLM2-360M
105
+ - source_model: HuggingFaceTB/SmolLM2-360M
106
+ - source_model: HuggingFaceTB/SmolLM2-360M
107
+ - source_model: HuggingFaceTB/SmolLM2-360M
108
+ - source_model: HuggingFaceTB/SmolLM2-360M
109
+ - source_model: HuggingFaceTB/SmolLM2-360M
110
+ - source_model: HuggingFaceTB/SmolLM2-360M
111
+ - source_model: HuggingFaceTB/SmolLM2-360M
112
+ - source_model: HuggingFaceTB/SmolLM2-360M
113
+ - source_model: HuggingFaceTB/SmolLM2-360M
114
+ - source_model: HuggingFaceTB/SmolLM2-360M
115
+ - source_model: HuggingFaceTB/SmolLM2-360M
116
+ - source_model: HuggingFaceTB/SmolLM2-360M
117
+ - source_model: HuggingFaceTB/SmolLM2-360M
118
+ - source_model: HuggingFaceTB/SmolLM2-360M
119
+ - source_model: HuggingFaceTB/SmolLM2-360M
120
+ - source_model: HuggingFaceTB/SmolLM2-360M
121
+ - source_model: HuggingFaceTB/SmolLM2-360M
122
+ - source_model: HuggingFaceTB/SmolLM2-360M
123
+ - source_model: HuggingFaceTB/SmolLM2-360M
124
+ - source_model: HuggingFaceTB/SmolLM2-360M
125
+ - source_model: HuggingFaceTB/SmolLM2-360M
126
+ - source_model: HuggingFaceTB/SmolLM2-360M
127
+ - source_model: HuggingFaceTB/SmolLM2-360M
128
+ - source_model: HuggingFaceTB/SmolLM2-360M
129
+ - source_model: HuggingFaceTB/SmolLM2-360M
130
+ - source_model: HuggingFaceTB/SmolLM2-360M
131
+ - source_model: HuggingFaceTB/SmolLM2-360M
132
+ - source_model: HuggingFaceTB/SmolLM2-360M
133
+ - source_model: HuggingFaceTB/SmolLM2-360M
134
+ - source_model: HuggingFaceTB/SmolLM2-360M
135
+ - source_model: HuggingFaceTB/SmolLM2-360M
136
+ - source_model: HuggingFaceTB/SmolLM2-360M
137
+ - source_model: HuggingFaceTB/SmolLM2-360M
138
+ - source_model: HuggingFaceTB/SmolLM2-360M
139
+ - source_model: HuggingFaceTB/SmolLM2-360M
140
+ - source_model: HuggingFaceTB/SmolLM2-360M
141
+ - source_model: HuggingFaceTB/SmolLM2-360M
142
+ - source_model: HuggingFaceTB/SmolLM2-360M
143
+ - source_model: HuggingFaceTB/SmolLM2-360M
144
+ - source_model: HuggingFaceTB/SmolLM2-360M
145
+ - source_model: HuggingFaceTB/SmolLM2-360M
146
+ - source_model: HuggingFaceTB/SmolLM2-360M
147
+ - source_model: HuggingFaceTB/SmolLM2-360M
148
+ - source_model: HuggingFaceTB/SmolLM2-360M
149
+ - source_model: HuggingFaceTB/SmolLM2-360M
150
+ - source_model: HuggingFaceTB/SmolLM2-360M
151
+ - source_model: HuggingFaceTB/SmolLM2-360M
152
+ - source_model: HuggingFaceTB/SmolLM2-360M
153
+ - source_model: HuggingFaceTB/SmolLM2-360M
154
+ - source_model: HuggingFaceTB/SmolLM2-360M
155
+ - source_model: HuggingFaceTB/SmolLM2-360M
156
+ - source_model: HuggingFaceTB/SmolLM2-360M
157
+ - source_model: HuggingFaceTB/SmolLM2-360M
158
+ - source_model: HuggingFaceTB/SmolLM2-360M
159
+ - source_model: HuggingFaceTB/SmolLM2-360M
160
+ - source_model: HuggingFaceTB/SmolLM2-360M
161
+ - source_model: HuggingFaceTB/SmolLM2-360M
162
+ - source_model: HuggingFaceTB/SmolLM2-360M
163
+ - source_model: HuggingFaceTB/SmolLM2-360M
164
+ - source_model: HuggingFaceTB/SmolLM2-360M
165
+ - source_model: HuggingFaceTB/SmolLM2-360M
166
+ - source_model: HuggingFaceTB/SmolLM2-360M
167
+ - source_model: HuggingFaceTB/SmolLM2-360M
168
+ - source_model: HuggingFaceTB/SmolLM2-360M
169
+ - source_model: HuggingFaceTB/SmolLM2-360M
170
+ - source_model: HuggingFaceTB/SmolLM2-360M
171
+ - source_model: HuggingFaceTB/SmolLM2-360M
172
+ - source_model: HuggingFaceTB/SmolLM2-360M
173
+ - source_model: HuggingFaceTB/SmolLM2-360M
174
+ - source_model: HuggingFaceTB/SmolLM2-360M
175
+ - source_model: HuggingFaceTB/SmolLM2-360M
176
+ - source_model: HuggingFaceTB/SmolLM2-360M
177
+ - source_model: HuggingFaceTB/SmolLM2-360M
178
+ - source_model: HuggingFaceTB/SmolLM2-360M
179
+ - source_model: HuggingFaceTB/SmolLM2-360M
180
+ - source_model: HuggingFaceTB/SmolLM2-360M
181
+ - source_model: HuggingFaceTB/SmolLM2-360M
182
+ - source_model: HuggingFaceTB/SmolLM2-360M
183
+ - source_model: HuggingFaceTB/SmolLM2-360M
184
+ - source_model: HuggingFaceTB/SmolLM2-360M
185
+ - source_model: HuggingFaceTB/SmolLM2-360M
186
+ - source_model: HuggingFaceTB/SmolLM2-360M
187
+ - source_model: HuggingFaceTB/SmolLM2-360M
188
+ - source_model: HuggingFaceTB/SmolLM2-360M
189
+ - source_model: HuggingFaceTB/SmolLM2-360M
190
+ - source_model: HuggingFaceTB/SmolLM2-360M
191
+ - source_model: HuggingFaceTB/SmolLM2-360M
192
+ - source_model: HuggingFaceTB/SmolLM2-360M
193
+ - source_model: HuggingFaceTB/SmolLM2-360M
194
+ - source_model: HuggingFaceTB/SmolLM2-360M
195
+ - source_model: HuggingFaceTB/SmolLM2-360M
196
+ - source_model: HuggingFaceTB/SmolLM2-360M
197
+ - source_model: HuggingFaceTB/SmolLM2-360M
198
+ - source_model: HuggingFaceTB/SmolLM2-360M
199
+ - source_model: HuggingFaceTB/SmolLM2-360M
200
+ - source_model: HuggingFaceTB/SmolLM2-360M
201
+ - source_model: HuggingFaceTB/SmolLM2-360M
202
+ - source_model: HuggingFaceTB/SmolLM2-360M
203
+ - source_model: HuggingFaceTB/SmolLM2-360M
204
+ - source_model: HuggingFaceTB/SmolLM2-360M
205
+ - source_model: HuggingFaceTB/SmolLM2-360M
206
+ - source_model: HuggingFaceTB/SmolLM2-360M
207
+ - source_model: HuggingFaceTB/SmolLM2-360M
208
+ - source_model: HuggingFaceTB/SmolLM2-360M
209
+ - source_model: HuggingFaceTB/SmolLM2-360M
210
+ - source_model: HuggingFaceTB/SmolLM2-360M
211
+ - source_model: HuggingFaceTB/SmolLM2-360M
212
+ - source_model: HuggingFaceTB/SmolLM2-360M
213
+ - source_model: HuggingFaceTB/SmolLM2-360M
214
+ - source_model: HuggingFaceTB/SmolLM2-360M
215
+ - source_model: HuggingFaceTB/SmolLM2-360M
216
+ - source_model: HuggingFaceTB/SmolLM2-360M
217
+ - source_model: HuggingFaceTB/SmolLM2-360M
218
+ - source_model: HuggingFaceTB/SmolLM2-360M
219
+ - source_model: HuggingFaceTB/SmolLM2-360M
220
+ - source_model: HuggingFaceTB/SmolLM2-360M
221
+ - source_model: HuggingFaceTB/SmolLM2-360M
222
+ - source_model: HuggingFaceTB/SmolLM2-360M
223
+ - source_model: HuggingFaceTB/SmolLM2-360M
224
+ - source_model: HuggingFaceTB/SmolLM2-360M
225
+ - source_model: HuggingFaceTB/SmolLM2-360M
226
+ - source_model: HuggingFaceTB/SmolLM2-360M
227
+ - source_model: HuggingFaceTB/SmolLM2-360M
228
+ - source_model: HuggingFaceTB/SmolLM2-360M
229
+ - source_model: HuggingFaceTB/SmolLM2-360M
230
+ - source_model: HuggingFaceTB/SmolLM2-360M
231
+ - source_model: HuggingFaceTB/SmolLM2-360M
232
+ - source_model: HuggingFaceTB/SmolLM2-360M
233
+ - source_model: HuggingFaceTB/SmolLM2-360M
234
+ - source_model: HuggingFaceTB/SmolLM2-360M
235
+ - source_model: HuggingFaceTB/SmolLM2-360M
236
+ - source_model: HuggingFaceTB/SmolLM2-360M
237
+ - source_model: HuggingFaceTB/SmolLM2-360M
238
+ - source_model: HuggingFaceTB/SmolLM2-360M
239
+ - source_model: HuggingFaceTB/SmolLM2-360M
240
+ - source_model: HuggingFaceTB/SmolLM2-360M
241
+ - source_model: HuggingFaceTB/SmolLM2-360M
242
+ - source_model: HuggingFaceTB/SmolLM2-360M
243
+ - source_model: HuggingFaceTB/SmolLM2-360M
244
+ - source_model: HuggingFaceTB/SmolLM2-360M
245
+ - source_model: HuggingFaceTB/SmolLM2-360M
246
+ - source_model: HuggingFaceTB/SmolLM2-360M
247
+ - source_model: HuggingFaceTB/SmolLM2-360M
248
+ - source_model: HuggingFaceTB/SmolLM2-360M
249
+ - source_model: HuggingFaceTB/SmolLM2-360M
250
+ - source_model: HuggingFaceTB/SmolLM2-360M
251
+ - source_model: HuggingFaceTB/SmolLM2-360M
252
+ - source_model: HuggingFaceTB/SmolLM2-360M
253
+ - source_model: HuggingFaceTB/SmolLM2-360M
254
+ - source_model: HuggingFaceTB/SmolLM2-360M
255
+ - source_model: HuggingFaceTB/SmolLM2-360M
256
+ - source_model: HuggingFaceTB/SmolLM2-360M
257
+ - source_model: HuggingFaceTB/SmolLM2-360M
258
+ - source_model: HuggingFaceTB/SmolLM2-360M
259
+ - source_model: HuggingFaceTB/SmolLM2-360M
260
+ - source_model: HuggingFaceTB/SmolLM2-360M
261
+ - source_model: HuggingFaceTB/SmolLM2-360M
262
+ - source_model: HuggingFaceTB/SmolLM2-360M
263
+
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0001c63f90bc1fc4aea9ab4a2c098754db53fef4faa43e94b6d4d67ad20115b2
3
+ size 4999880296
model-00002-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95818f857016b0bc90b45b213cba06a788a35436c20752294ee97caeb894589a
3
+ size 4998895168
model-00003-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ba1ed5ec269f8611376cb3cf512e8f63539e323496ad41ee7f0d7ee69faf46c
3
+ size 4998895168
model-00004-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:994f13989c9ffef85441b8f1f89f933f318f64b0e3661612b9913b8b9df09bdc
3
+ size 4998899568
model-00005-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:850390ba8cf5ab8313fbb0b31ee38c7f3d75a60ab20a1313c6f5b7556cdf433e
3
+ size 4998895168
model-00006-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b89607d8c2673d1c59b7579f8f3fb0690a24f279d44c02a03526b454f6694a5
3
+ size 4998895168
model-00007-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13bf6f42f638c56e531981009177cfc23cf52c2aa20db4c944ec3ad3f501fec5
3
+ size 4998899568
model-00008-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baceb426e09d748a2ec2b264deec124d730ec8857e45c3ad3799d35744c47208
3
+ size 4998895600
model-00009-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66fe25e127c38c8cfde06be2ff32d849bf8dc8dd14249134578c5f10fff6cd6d
3
+ size 4998896192
model-00010-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d65cb7f10445d4574223c8342bd06890f3b264fee1016f0672071bf1677509f7
3
+ size 4998900592
model-00011-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e30c043c1d8bbf873b5e8e2f74748122514ce6aace0251baca9091ae5149601d
3
+ size 4998896192
model-00012-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:583b2c1ef155f806d0b7d370ea1b3316475beba72c7001ca3537e14fde0ad146
3
+ size 4998896192
model-00013-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa1c0eef359723a6333ae661bf9581a36445aec5f904681da8007c7c992d89f5
3
+ size 4998900592
model-00014-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9bf88f3789976076ccb313bbf1f00b3ee9b234009bbae91b1080c9718abfa15
3
+ size 4998896192
model-00015-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85c3feae55c90428a364030dc1d491cb66e52246df93740b2c48164a34dc16b6
3
+ size 4998896192
model-00016-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:133b67f48bebf504c87fc13b0c6aa08866def2db013f7cbfbe9cff7a04f2e72c
3
+ size 4998900592
model-00017-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58abbe76d568d81fbe97b17b09e61806bb9bfb692538e0ef04fc109c48b159cc
3
+ size 4998896200
model-00018-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb4ba492003636840e2e77336303115ca11a474294ab6d45b197ec0474ff15a9
3
+ size 4998896200
model-00019-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99bb5119bd07fc72be346a0914c6585d88247d370e42316df6c93a2850572b01
3
+ size 4998900600
model-00020-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b420db76cfbb3270501812f8d7e28816b0ac9de4f14130c3c42b4cce7f42ce7
3
+ size 4998896200
model-00021-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21b204e9e1f349f8d188e65aa82cde3c62f0cc8a11976ca01389f559492a3adb
3
+ size 4998896200
model-00022-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6463d019a8df7c7531fcec3b1253706aa0da50081086f3721c7128d54a207d0e
3
+ size 4998900600
model-00023-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f32b32d723053d567e444aeae693f3c4c8224f3bca2d17a43007033da26f304
3
+ size 4998896200
model-00024-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:296cab440f8d6fab30c113d208c21a7438d0a63a831a9f82122a0f62b6677086
3
+ size 4998896200
model-00025-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64372e44e0e9a79d14dc37b8da71f32cc4c8c418b0e1bfcfe35b79666953e840
3
+ size 1186563688
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|endoftext|>",
4
+ "<|im_start|>",
5
+ "<|im_end|>",
6
+ "<repo_name>",
7
+ "<reponame>",
8
+ "<file_sep>",
9
+ "<filename>",
10
+ "<gh_stars>",
11
+ "<issue_start>",
12
+ "<issue_comment>",
13
+ "<issue_closed>",
14
+ "<jupyter_start>",
15
+ "<jupyter_text>",
16
+ "<jupyter_code>",
17
+ "<jupyter_output>",
18
+ "<jupyter_script>",
19
+ "<empty_output>"
20
+ ],
21
+ "bos_token": {
22
+ "content": "<|endoftext|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ "eos_token": {
29
+ "content": "<|endoftext|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ },
35
+ "pad_token": "<|endoftext|>",
36
+ "unk_token": {
37
+ "content": "<|endoftext|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ }
43
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<repo_name>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<reponame>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "5": {
45
+ "content": "<file_sep>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "6": {
53
+ "content": "<filename>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "7": {
61
+ "content": "<gh_stars>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8": {
69
+ "content": "<issue_start>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "9": {
77
+ "content": "<issue_comment>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "10": {
85
+ "content": "<issue_closed>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "11": {
93
+ "content": "<jupyter_start>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "12": {
101
+ "content": "<jupyter_text>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "13": {
109
+ "content": "<jupyter_code>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "14": {
117
+ "content": "<jupyter_output>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "15": {
125
+ "content": "<jupyter_script>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "16": {
133
+ "content": "<empty_output>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ }
140
+ },
141
+ "additional_special_tokens": [
142
+ "<|endoftext|>",
143
+ "<|im_start|>",
144
+ "<|im_end|>",
145
+ "<repo_name>",
146
+ "<reponame>",
147
+ "<file_sep>",
148
+ "<filename>",
149
+ "<gh_stars>",
150
+ "<issue_start>",
151
+ "<issue_comment>",
152
+ "<issue_closed>",
153
+ "<jupyter_start>",
154
+ "<jupyter_text>",
155
+ "<jupyter_code>",
156
+ "<jupyter_output>",
157
+ "<jupyter_script>",
158
+ "<empty_output>"
159
+ ],
160
+ "bos_token": "<|endoftext|>",
161
+ "clean_up_tokenization_spaces": false,
162
+ "eos_token": "<|endoftext|>",
163
+ "extra_special_tokens": {},
164
+ "model_max_length": 8192,
165
+ "pad_token": "<|endoftext|>",
166
+ "tokenizer_class": "GPT2Tokenizer",
167
+ "unk_token": "<|endoftext|>",
168
+ "vocab_size": 49152
169
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff