riczhou commited on
Commit
61f7bb0
1 Parent(s): 154e4ab

Initial commit

Browse files
logs.txt ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/135 [00:00<?, ?it/s]
1
 
 
2
  0%| | 0/135 [00:00<?, ?it/s]
3
 
 
4
  0%| | 0/135 [00:02<?, ?it/s]
5
 
 
6
  0%| | 0/135 [00:03<?, ?it/s]
7
 
 
8
  0%| | 0/135 [00:03<?, ?it/s]
9
  1%| | 1/135 [00:03<07:24, 3.32s/it]
10
 
 
11
  1%| | 1/135 [00:03<07:24, 3.32s/it]
12
 
 
13
  1%| | 1/135 [00:03<07:24, 3.32s/it]
14
  1%|▏ | 2/135 [00:03<03:13, 1.46s/it]
15
 
 
16
  1%|▏ | 2/135 [00:03<03:13, 1.46s/it]
17
 
 
18
  1%|▏ | 2/135 [00:03<03:13, 1.46s/it]
19
 
 
20
  1%|▏ | 2/135 [00:03<03:13, 1.46s/it]
21
 
 
22
  1%|▏ | 2/135 [00:03<03:13, 1.46s/it]
23
  3%|▎ | 4/135 [00:03<01:29, 1.47it/s]
24
 
 
25
  3%|▎ | 4/135 [00:03<01:29, 1.47it/s]
26
 
 
27
  3%|▎ | 4/135 [00:04<01:29, 1.47it/s]
28
 
 
29
  3%|▎ | 4/135 [00:04<01:29, 1.47it/s]
30
  4%|▎ | 5/135 [00:04<01:22, 1.57it/s]
31
 
 
32
  4%|▎ | 5/135 [00:04<01:22, 1.57it/s]
33
 
 
34
  4%|▎ | 5/135 [00:04<01:22, 1.57it/s]
35
 
 
36
  4%|▎ | 5/135 [00:04<01:22, 1.57it/s]
37
 
 
38
  4%|▎ | 5/135 [00:04<01:22, 1.57it/s]
39
  5%|▌ | 7/135 [00:04<00:56, 2.28it/s]
40
 
 
41
  5%|▌ | 7/135 [00:04<00:56, 2.28it/s]
42
 
 
43
  5%|▌ | 7/135 [00:05<00:56, 2.28it/s]
44
 
 
45
  5%|▌ | 7/135 [00:05<00:56, 2.28it/s]
46
  6%|▌ | 8/135 [00:05<00:55, 2.30it/s]
47
 
 
48
  6%|▌ | 8/135 [00:05<00:55, 2.30it/s]
49
 
 
50
  6%|▌ | 8/135 [00:05<00:55, 2.30it/s]
51
 
 
52
  6%|▌ | 8/135 [00:05<00:55, 2.30it/s]
53
 
 
54
  6%|▌ | 8/135 [00:05<00:55, 2.30it/s]
55
 
 
56
  6%|▌ | 8/135 [00:05<00:55, 2.30it/s]
57
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
58
 
 
59
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
60
 
 
61
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
62
 
 
63
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
64
 
 
65
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
66
 
 
67
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
68
 
 
69
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
70
 
 
71
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
72
 
 
73
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
74
 
 
75
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
76
 
 
77
  8%|��� | 11/135 [00:05<00:28, 4.32it/s]
78
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
79
 
 
80
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
81
 
 
82
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
83
 
 
84
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
85
 
 
86
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
87
 
 
88
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
89
 
 
90
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
91
 
 
92
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
93
 
 
94
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
95
 
 
96
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
97
 
 
98
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
99
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
100
 
 
101
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
102
 
 
103
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
104
 
 
105
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
106
 
 
107
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
108
 
 
109
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
110
 
 
111
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
112
 
 
113
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
114
 
 
115
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
116
 
 
117
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
118
 
 
119
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
120
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
121
 
 
122
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
123
 
 
124
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
125
 
 
126
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
127
 
 
128
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
129
 
 
130
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
131
 
 
132
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
133
 
 
134
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
135
 
 
136
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
137
 
 
138
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
139
 
 
140
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
141
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
142
 
 
143
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
144
 
 
145
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
146
 
 
147
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
148
 
 
149
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
150
 
 
151
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
152
 
 
153
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
154
 
 
155
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
156
 
 
157
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
158
 
 
159
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
160
 
 
161
  26%|██▌ | 35/135 [00:06<00:03, 26.60it/s]
162
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
163
 
 
164
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
165
 
 
166
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
167
 
 
168
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
169
 
 
170
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
171
 
 
172
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
173
 
 
174
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
175
 
 
176
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
177
 
 
178
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
179
 
 
180
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
181
 
 
182
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
183
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
184
 
 
185
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
186
 
 
187
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
188
 
 
189
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
190
 
 
191
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
192
 
 
193
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
194
 
 
195
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
196
 
 
197
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
198
 
 
199
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
200
 
 
201
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
202
 
 
203
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
204
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
205
 
 
206
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
207
 
 
208
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
209
 
 
210
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
211
 
 
212
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
213
 
 
214
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
215
 
 
216
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
217
 
 
218
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
219
 
 
220
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
221
 
 
222
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
223
 
 
224
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
225
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
226
 
 
227
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
228
 
 
229
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
230
 
 
231
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
232
 
 
233
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
234
 
 
235
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
236
 
 
237
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
238
 
 
239
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
240
 
 
241
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
242
 
 
243
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
244
 
 
245
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
246
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
247
 
 
248
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
249
 
 
250
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
251
 
 
252
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
253
 
 
254
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
255
 
 
256
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
257
 
 
258
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
259
 
 
260
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
261
 
 
262
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
263
 
 
264
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
265
 
 
266
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
267
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
268
 
 
269
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
270
 
 
271
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
272
 
 
273
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
274
 
 
275
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
276
 
 
277
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
278
 
 
279
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
280
 
 
281
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
282
 
 
283
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
284
 
 
285
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
286
 
 
287
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
288
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
289
 
 
290
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
291
 
 
292
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
293
 
 
294
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
295
 
 
296
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
297
 
 
298
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
299
 
 
300
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
301
 
 
302
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
303
 
 
304
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
305
 
 
306
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
307
 
 
308
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
309
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
310
 
 
311
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
312
 
 
313
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
314
 
 
315
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
316
 
 
317
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
318
 
 
319
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
320
 
 
321
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
322
 
 
323
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
324
 
 
325
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
326
 
 
327
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
328
 
 
329
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
330
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
331
 
 
332
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
333
 
 
334
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
335
 
 
336
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
337
 
 
338
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
339
 
 
340
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
341
 
 
342
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
343
 
 
344
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
345
 
 
346
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
347
 
 
348
  66%|██████▌ | 89/135 [00:07<00:00, 49.41it/s]
349
 
 
350
  66%|██████▌ | 89/135 [00:07<00:00, 49.41it/s]
351
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
352
 
 
353
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
354
 
 
355
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
356
 
 
357
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
358
 
 
359
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
360
 
 
361
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
362
 
 
363
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
364
 
 
365
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
366
 
 
367
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
368
 
 
369
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
370
 
 
371
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
372
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
373
 
 
374
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
375
 
 
376
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
377
 
 
378
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
379
 
 
380
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
381
 
 
382
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
383
 
 
384
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
385
 
 
386
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
387
 
 
388
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
389
 
 
390
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
391
 
 
392
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
393
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
394
 
 
395
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
396
 
 
397
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
398
 
 
399
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
400
 
 
401
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
402
 
 
403
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
404
 
 
405
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
406
 
 
407
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
408
 
 
409
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
410
 
 
411
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
412
 
 
413
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
414
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
415
 
 
416
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
417
 
 
418
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
419
 
 
420
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
421
 
 
422
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
423
 
 
424
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
425
 
 
426
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
427
 
 
428
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
429
 
 
430
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
431
 
 
432
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
433
 
 
434
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
435
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
436
 
 
437
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
438
 
 
439
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
440
 
 
441
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
442
 
 
443
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
444
 
 
445
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
446
 
 
447
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
448
 
 
449
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
450
 
 
451
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
452
 
 
453
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
454
 
 
455
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
456
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
457
 
 
458
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
459
 
 
460
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
461
 
 
462
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
463
 
 
464
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
465
 
 
466
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
467
 
 
468
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
469
 
 
470
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
471
 
 
472
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
473
 
 
474
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
475
 
 
476
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
477
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
478
 
 
479
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
480
 
 
481
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
482
 
 
483
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
484
 
 
485
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
486
 
 
487
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
488
 
 
489
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
 
 
 
 
 
 
 
 
 
 
 
1
+ /opt/conda/envs/py310/bin/python -m mlc_llm gen_config /models/TinyLlama-1.1B-Chat-v1.0 --quantization q4f32_1 --conv-template chatml --output /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC
2
+ [2024-06-04 20:07:17] INFO auto_config.py:116: Found model configuration: /models/TinyLlama-1.1B-Chat-v1.0/config.json
3
+ [2024-06-04 20:07:17] INFO auto_config.py:154: Found model type: llama. Use `--model-type` to override.
4
+ [2024-06-04 20:07:17] INFO llama_model.py:52: context_window_size not found in config.json. Falling back to max_position_embeddings (2048)
5
+ [2024-06-04 20:07:17] INFO llama_model.py:72: prefill_chunk_size defaults to 2048
6
+ [2024-06-04 20:07:17] INFO config.py:107: Overriding max_batch_size from 1 to 80
7
+ [2024-06-04 20:07:17] INFO gen_config.py:143: [generation_config.json] Setting bos_token_id: 1
8
+ [2024-06-04 20:07:17] INFO gen_config.py:143: [generation_config.json] Setting eos_token_id: 2
9
+ [2024-06-04 20:07:17] INFO gen_config.py:143: [generation_config.json] Setting pad_token_id: 0
10
+ [2024-06-04 20:07:17] INFO gen_config.py:155: Found tokenizer config: /models/TinyLlama-1.1B-Chat-v1.0/tokenizer.model. Copying to /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC/tokenizer.model
11
+ [2024-06-04 20:07:17] INFO gen_config.py:155: Found tokenizer config: /models/TinyLlama-1.1B-Chat-v1.0/tokenizer.json. Copying to /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC/tokenizer.json
12
+ [2024-06-04 20:07:17] INFO gen_config.py:157: Not found tokenizer config: /models/TinyLlama-1.1B-Chat-v1.0/vocab.json
13
+ [2024-06-04 20:07:17] INFO gen_config.py:157: Not found tokenizer config: /models/TinyLlama-1.1B-Chat-v1.0/merges.txt
14
+ [2024-06-04 20:07:17] INFO gen_config.py:157: Not found tokenizer config: /models/TinyLlama-1.1B-Chat-v1.0/added_tokens.json
15
+ [2024-06-04 20:07:17] INFO gen_config.py:155: Found tokenizer config: /models/TinyLlama-1.1B-Chat-v1.0/tokenizer_config.json. Copying to /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC/tokenizer_config.json
16
+ [2024-06-04 20:07:17] INFO gen_config.py:216: Detected tokenizer info: {'token_postproc_method': 'byte_fallback', 'prepend_space_in_encode': True, 'strip_space_in_decode': True}
17
+ [2024-06-04 20:07:17] INFO gen_config.py:32: [System default] Setting temperature: 1.0
18
+ [2024-06-04 20:07:17] INFO gen_config.py:32: [System default] Setting presence_penalty: 0.0
19
+ [2024-06-04 20:07:17] INFO gen_config.py:32: [System default] Setting frequency_penalty: 0.0
20
+ [2024-06-04 20:07:17] INFO gen_config.py:32: [System default] Setting repetition_penalty: 1.0
21
+ [2024-06-04 20:07:17] INFO gen_config.py:32: [System default] Setting top_p: 1.0
22
+ [2024-06-04 20:07:17] INFO gen_config.py:223: Dumping configuration file to: /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC/mlc-chat-config.json
23
+ /opt/conda/envs/py310/bin/python -m mlc_llm convert_weight /models/TinyLlama-1.1B-Chat-v1.0 --quantization q4f32_1 --output /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC
24
+ [2024-06-04 20:07:19] INFO auto_config.py:116: Found model configuration: /models/TinyLlama-1.1B-Chat-v1.0/config.json
25
+ [2024-06-04 20:07:20] INFO auto_device.py:79: Found device: cuda:0
26
+ [2024-06-04 20:07:21] INFO auto_device.py:88: Not found device: rocm:0
27
+ [2024-06-04 20:07:23] INFO auto_device.py:88: Not found device: metal:0
28
+ [2024-06-04 20:07:25] INFO auto_device.py:79: Found device: vulkan:0
29
+ [2024-06-04 20:07:25] INFO auto_device.py:79: Found device: vulkan:1
30
+ [2024-06-04 20:07:25] INFO auto_device.py:79: Found device: vulkan:2
31
+ [2024-06-04 20:07:25] INFO auto_device.py:79: Found device: vulkan:3
32
+ [2024-06-04 20:07:26] INFO auto_device.py:88: Not found device: opencl:0
33
+ [2024-06-04 20:07:26] INFO auto_device.py:35: Using device: cuda:0
34
+ [2024-06-04 20:07:26] INFO auto_weight.py:71: Finding weights in: /models/TinyLlama-1.1B-Chat-v1.0
35
+ [2024-06-04 20:07:26] INFO auto_weight.py:137: Not found Huggingface PyTorch
36
+ [2024-06-04 20:07:27] INFO auto_weight.py:161: Found source weight format: huggingface-safetensor. Source configuration: /models/TinyLlama-1.1B-Chat-v1.0/model.safetensors.index.json
37
+ [2024-06-04 20:07:27] INFO auto_weight.py:107: Using source weight configuration: /models/TinyLlama-1.1B-Chat-v1.0/model.safetensors.index.json. Use `--source` to override.
38
+ [2024-06-04 20:07:27] INFO auto_weight.py:111: Using source weight format: huggingface-safetensor. Use `--source-format` to override.
39
+ [2024-06-04 20:07:27] INFO auto_config.py:154: Found model type: llama. Use `--model-type` to override.
40
+ [2024-06-04 20:07:27] INFO llama_model.py:52: context_window_size not found in config.json. Falling back to max_position_embeddings (2048)
41
+ [2024-06-04 20:07:27] INFO llama_model.py:72: prefill_chunk_size defaults to 2048
42
+ Weight conversion with arguments:
43
+ --config /models/TinyLlama-1.1B-Chat-v1.0/config.json
44
+ --quantization GroupQuantize(name='q4f32_1', kind='group-quant', group_size=32, quantize_dtype='int4', storage_dtype='uint32', model_dtype='float32', linear_weight_layout='NK', quantize_embedding=True, quantize_final_fc=True, num_elem_per_storage=8, num_storage_per_group=4, max_int_value=7)
45
+ --model-type llama
46
+ --device cuda:0
47
+ --source /models/TinyLlama-1.1B-Chat-v1.0/model.safetensors.index.json
48
+ --source-format huggingface-safetensor
49
+ --output /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC
50
+ Start storing to cache /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC
51
+
52
  0%| | 0/135 [00:00<?, ?it/s]
53
 
54
+
55
  0%| | 0/135 [00:00<?, ?it/s]
56
 
57
+
58
  0%| | 0/135 [00:02<?, ?it/s]
59
 
60
+
61
  0%| | 0/135 [00:03<?, ?it/s]
62
 
63
+
64
  0%| | 0/135 [00:03<?, ?it/s]
65
  1%| | 1/135 [00:03<07:24, 3.32s/it]
66
 
67
+
68
  1%| | 1/135 [00:03<07:24, 3.32s/it]
69
 
70
+
71
  1%| | 1/135 [00:03<07:24, 3.32s/it]
72
  1%|▏ | 2/135 [00:03<03:13, 1.46s/it]
73
 
74
+
75
  1%|▏ | 2/135 [00:03<03:13, 1.46s/it]
76
 
77
+
78
  1%|▏ | 2/135 [00:03<03:13, 1.46s/it]
79
 
80
+
81
  1%|▏ | 2/135 [00:03<03:13, 1.46s/it]
82
 
83
+
84
  1%|▏ | 2/135 [00:03<03:13, 1.46s/it]
85
  3%|▎ | 4/135 [00:03<01:29, 1.47it/s]
86
 
87
+
88
  3%|▎ | 4/135 [00:03<01:29, 1.47it/s]
89
 
90
+
91
  3%|▎ | 4/135 [00:04<01:29, 1.47it/s]
92
 
93
+
94
  3%|▎ | 4/135 [00:04<01:29, 1.47it/s]
95
  4%|▎ | 5/135 [00:04<01:22, 1.57it/s]
96
 
97
+
98
  4%|▎ | 5/135 [00:04<01:22, 1.57it/s]
99
 
100
+
101
  4%|▎ | 5/135 [00:04<01:22, 1.57it/s]
102
 
103
+
104
  4%|▎ | 5/135 [00:04<01:22, 1.57it/s]
105
 
106
+
107
  4%|▎ | 5/135 [00:04<01:22, 1.57it/s]
108
  5%|▌ | 7/135 [00:04<00:56, 2.28it/s]
109
 
110
+
111
  5%|▌ | 7/135 [00:04<00:56, 2.28it/s]
112
 
113
+
114
  5%|▌ | 7/135 [00:05<00:56, 2.28it/s]
115
 
116
+
117
  5%|▌ | 7/135 [00:05<00:56, 2.28it/s]
118
  6%|▌ | 8/135 [00:05<00:55, 2.30it/s]
119
 
120
+
121
  6%|▌ | 8/135 [00:05<00:55, 2.30it/s]
122
 
123
+
124
  6%|▌ | 8/135 [00:05<00:55, 2.30it/s]
125
 
126
+
127
  6%|▌ | 8/135 [00:05<00:55, 2.30it/s]
128
 
129
+
130
  6%|▌ | 8/135 [00:05<00:55, 2.30it/s]
131
 
132
+
133
  6%|▌ | 8/135 [00:05<00:55, 2.30it/s]
134
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
135
 
136
+
137
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
138
 
139
+
140
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
141
 
142
+
143
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
144
 
145
+
146
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
147
 
148
+
149
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
150
 
151
+
152
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
153
 
154
+
155
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
156
 
157
+
158
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
159
 
160
+
161
  8%|▊ | 11/135 [00:05<00:28, 4.32it/s]
162
 
163
+
164
  8%|��� | 11/135 [00:05<00:28, 4.32it/s]
165
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
166
 
167
+
168
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
169
 
170
+
171
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
172
 
173
+
174
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
175
 
176
+
177
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
178
 
179
+
180
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
181
 
182
+
183
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
184
 
185
+
186
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
187
 
188
+
189
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
190
 
191
+
192
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
193
 
194
+
195
  13%|█▎ | 17/135 [00:05<00:12, 9.39it/s]
196
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
197
 
198
+
199
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
200
 
201
+
202
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
203
 
204
+
205
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
206
 
207
+
208
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
209
 
210
+
211
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
212
 
213
+
214
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
215
 
216
+
217
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
218
 
219
+
220
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
221
 
222
+
223
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
224
 
225
+
226
  17%|█▋ | 23/135 [00:05<00:07, 15.03it/s]
227
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
228
 
229
+
230
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
231
 
232
+
233
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
234
 
235
+
236
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
237
 
238
+
239
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
240
 
241
+
242
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
243
 
244
+
245
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
246
 
247
+
248
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
249
 
250
+
251
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
252
 
253
+
254
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
255
 
256
+
257
  21%|██▏ | 29/135 [00:05<00:05, 20.91it/s]
258
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
259
 
260
+
261
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
262
 
263
+
264
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
265
 
266
+
267
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
268
 
269
+
270
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
271
 
272
+
273
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
274
 
275
+
276
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
277
 
278
+
279
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
280
 
281
+
282
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
283
 
284
+
285
  26%|██▌ | 35/135 [00:05<00:03, 26.60it/s]
286
 
287
+
288
  26%|██▌ | 35/135 [00:06<00:03, 26.60it/s]
289
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
290
 
291
+
292
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
293
 
294
+
295
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
296
 
297
+
298
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
299
 
300
+
301
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
302
 
303
+
304
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
305
 
306
+
307
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
308
 
309
+
310
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
311
 
312
+
313
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
314
 
315
+
316
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
317
 
318
+
319
  30%|███ | 41/135 [00:06<00:02, 31.80it/s]
320
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
321
 
322
+
323
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
324
 
325
+
326
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
327
 
328
+
329
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
330
 
331
+
332
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
333
 
334
+
335
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
336
 
337
+
338
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
339
 
340
+
341
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
342
 
343
+
344
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
345
 
346
+
347
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
348
 
349
+
350
  35%|███▍ | 47/135 [00:06<00:02, 36.29it/s]
351
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
352
 
353
+
354
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
355
 
356
+
357
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
358
 
359
+
360
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
361
 
362
+
363
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
364
 
365
+
366
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
367
 
368
+
369
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
370
 
371
+
372
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
373
 
374
+
375
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
376
 
377
+
378
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
379
 
380
+
381
  39%|███▉ | 53/135 [00:06<00:02, 40.11it/s]
382
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
383
 
384
+
385
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
386
 
387
+
388
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
389
 
390
+
391
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
392
 
393
+
394
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
395
 
396
+
397
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
398
 
399
+
400
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
401
 
402
+
403
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
404
 
405
+
406
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
407
 
408
+
409
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
410
 
411
+
412
  44%|████▎ | 59/135 [00:06<00:01, 42.90it/s]
413
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
414
 
415
+
416
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
417
 
418
+
419
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
420
 
421
+
422
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
423
 
424
+
425
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
426
 
427
+
428
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
429
 
430
+
431
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
432
 
433
+
434
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
435
 
436
+
437
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
438
 
439
+
440
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
441
 
442
+
443
  48%|████▊ | 65/135 [00:06<00:01, 45.23it/s]
444
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
445
 
446
+
447
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
448
 
449
+
450
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
451
 
452
+
453
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
454
 
455
+
456
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
457
 
458
+
459
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
460
 
461
+
462
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
463
 
464
+
465
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
466
 
467
+
468
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
469
 
470
+
471
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
472
 
473
+
474
  53%|█████▎ | 71/135 [00:06<00:01, 46.99it/s]
475
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
476
 
477
+
478
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
479
 
480
+
481
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
482
 
483
+
484
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
485
 
486
+
487
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
488
 
489
+
490
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
491
 
492
+
493
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
494
 
495
+
496
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
497
 
498
+
499
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
500
 
501
+
502
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
503
 
504
+
505
  57%|█████▋ | 77/135 [00:06<00:01, 48.15it/s]
506
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
507
 
508
+
509
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
510
 
511
+
512
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
513
 
514
+
515
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
516
 
517
+
518
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
519
 
520
+
521
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
522
 
523
+
524
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
525
 
526
+
527
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
528
 
529
+
530
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
531
 
532
+
533
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
534
 
535
+
536
  61%|██████▏ | 83/135 [00:06<00:01, 48.95it/s]
537
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
538
 
539
+
540
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
541
 
542
+
543
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
544
 
545
+
546
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
547
 
548
+
549
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
550
 
551
+
552
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
553
 
554
+
555
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
556
 
557
+
558
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
559
 
560
+
561
  66%|██████▌ | 89/135 [00:06<00:00, 49.41it/s]
562
 
563
+
564
  66%|██████▌ | 89/135 [00:07<00:00, 49.41it/s]
565
 
566
+
567
  66%|██████▌ | 89/135 [00:07<00:00, 49.41it/s]
568
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
569
 
570
+
571
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
572
 
573
+
574
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
575
 
576
+
577
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
578
 
579
+
580
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
581
 
582
+
583
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
584
 
585
+
586
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
587
 
588
+
589
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
590
 
591
+
592
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
593
 
594
+
595
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
596
 
597
+
598
  70%|███████ | 95/135 [00:07<00:00, 49.77it/s]
599
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
600
 
601
+
602
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
603
 
604
+
605
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
606
 
607
+
608
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
609
 
610
+
611
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
612
 
613
+
614
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
615
 
616
+
617
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
618
 
619
+
620
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
621
 
622
+
623
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
624
 
625
+
626
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
627
 
628
+
629
  75%|███████▍ | 101/135 [00:07<00:00, 50.25it/s]
630
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
631
 
632
+
633
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
634
 
635
+
636
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
637
 
638
+
639
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
640
 
641
+
642
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
643
 
644
+
645
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
646
 
647
+
648
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
649
 
650
+
651
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
652
 
653
+
654
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
655
 
656
+
657
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
658
 
659
+
660
  79%|███████▉ | 107/135 [00:07<00:00, 50.64it/s]
661
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
662
 
663
+
664
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
665
 
666
+
667
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
668
 
669
+
670
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
671
 
672
+
673
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
674
 
675
+
676
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
677
 
678
+
679
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
680
 
681
+
682
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
683
 
684
+
685
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
686
 
687
+
688
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
689
 
690
+
691
  84%|████████▎ | 113/135 [00:07<00:00, 50.97it/s]
692
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
693
 
694
+
695
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
696
 
697
+
698
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
699
 
700
+
701
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
702
 
703
+
704
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
705
 
706
+
707
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
708
 
709
+
710
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
711
 
712
+
713
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
714
 
715
+
716
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
717
 
718
+
719
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
720
 
721
+
722
  88%|████████▊ | 119/135 [00:07<00:00, 51.27it/s]
723
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
724
 
725
+
726
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
727
 
728
+
729
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
730
 
731
+
732
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
733
 
734
+
735
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
736
 
737
+
738
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
739
 
740
+
741
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
742
 
743
+
744
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
745
 
746
+
747
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
748
 
749
+
750
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
751
 
752
+
753
  93%|█████████▎| 125/135 [00:07<00:00, 50.68it/s]
754
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
755
 
756
+
757
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
758
 
759
+
760
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
761
 
762
+
763
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
764
 
765
+
766
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
767
 
768
+
769
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
770
 
771
+
772
  97%|█████████▋| 131/135 [00:07<00:00, 50.69it/s]
773
+ [2024-06-04 20:07:37] INFO huggingface_loader.py:197: Unloading HF weight file: /models/TinyLlama-1.1B-Chat-v1.0/model.safetensors
774
+ [2024-06-04 20:07:38] INFO stats.py:77: Time usage: HF loading: 2.395 sec; Pre-quantization mapping: 0.820 sec; Quantization: 2.709 sec
775
+ [2024-06-04 20:07:38] INFO stats.py:91: RAM usage: Peak RAM: 4.098 GB. Total bytes loaded from disk: 4.098 GB
776
+ [2024-06-04 20:07:38] INFO convert_weight.py:155: Parameter size after quantization: 0.641 GB
777
+ [2024-06-04 20:07:38] INFO convert_weight.py:160: Total parameters: 1,100,048,384
778
+ [2024-06-04 20:07:38] INFO convert_weight.py:161: Bits per parameter: 5.002
779
+ [2024-06-04 20:07:38] INFO convert_weight.py:166: Saved to directory: /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC
780
+
781
+ All finished, 24 total shards committed, record saved to /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC/ndarray-cache.json
782
+ Also saved a bf16 record to /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC/ndarray-cache-b16.json
mlc-chat-config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "q4f32_1",
5
+ "model_config": {
6
+ "hidden_size": 2048,
7
+ "intermediate_size": 5632,
8
+ "num_attention_heads": 32,
9
+ "num_hidden_layers": 22,
10
+ "rms_norm_eps": 1e-05,
11
+ "vocab_size": 32000,
12
+ "position_embedding_base": 10000.0,
13
+ "context_window_size": 2048,
14
+ "prefill_chunk_size": 2048,
15
+ "num_key_value_heads": 4,
16
+ "head_dim": 64,
17
+ "tensor_parallel_shards": 1,
18
+ "max_batch_size": 80
19
+ },
20
+ "vocab_size": 32000,
21
+ "context_window_size": 2048,
22
+ "sliding_window_size": -1,
23
+ "prefill_chunk_size": 2048,
24
+ "attention_sink_size": -1,
25
+ "tensor_parallel_shards": 1,
26
+ "temperature": 1.0,
27
+ "presence_penalty": 0.0,
28
+ "frequency_penalty": 0.0,
29
+ "repetition_penalty": 1.0,
30
+ "top_p": 1.0,
31
+ "tokenizer_files": [
32
+ "tokenizer.model",
33
+ "tokenizer.json",
34
+ "tokenizer_config.json"
35
+ ],
36
+ "tokenizer_info": {
37
+ "token_postproc_method": "byte_fallback",
38
+ "prepend_space_in_encode": true,
39
+ "strip_space_in_decode": true
40
+ },
41
+ "conv_template": {
42
+ "name": "chatml",
43
+ "system_template": "<|im_start|>system\n{system_message}",
44
+ "system_message": "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.",
45
+ "system_prefix_token_ids": null,
46
+ "add_role_after_system_message": true,
47
+ "roles": {
48
+ "user": "<|im_start|>user",
49
+ "assistant": "<|im_start|>assistant"
50
+ },
51
+ "role_templates": {
52
+ "user": "{user_message}",
53
+ "assistant": "{assistant_message}",
54
+ "tool": "{tool_message}"
55
+ },
56
+ "messages": [],
57
+ "seps": [
58
+ "<|im_end|>\n"
59
+ ],
60
+ "role_content_sep": "\n",
61
+ "role_empty_sep": "\n",
62
+ "stop_str": [
63
+ "<|im_end|>"
64
+ ],
65
+ "stop_token_ids": [
66
+ 2
67
+ ],
68
+ "function_string": "",
69
+ "use_function_calling": false
70
+ },
71
+ "pad_token_id": 0,
72
+ "bos_token_id": 1,
73
+ "eos_token_id": 2
74
+ }
ndarray-cache-b16.json ADDED
@@ -0,0 +1,2631 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 225,
4
+ "ParamBytes": 687841280.0,
5
+ "BitsPerParam": 5.0022620095953885
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 32768000,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.q_weight",
15
+ "shape": [
16
+ 32000,
17
+ 256
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 32768000,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "6acf42e9e3428cda2d937cedfbc2bdbe"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 32768000,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.q_weight",
34
+ "shape": [
35
+ 32000,
36
+ 256
37
+ ],
38
+ "dtype": "uint32",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 32768000,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "5a4dc43f3da7ea1f953c0c460796a420"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 32976896,
50
+ "records": [
51
+ {
52
+ "name": "lm_head.q_scale",
53
+ "shape": [
54
+ 32000,
55
+ 64
56
+ ],
57
+ "dtype": "bfloat16",
58
+ "format": "raw",
59
+ "nbytes": 4096000,
60
+ "byteOffset": 0
61
+ },
62
+ {
63
+ "name": "model.embed_tokens.q_scale",
64
+ "shape": [
65
+ 32000,
66
+ 64
67
+ ],
68
+ "dtype": "bfloat16",
69
+ "format": "raw",
70
+ "nbytes": 4096000,
71
+ "byteOffset": 4096000
72
+ },
73
+ {
74
+ "name": "model.layers.0.input_layernorm.weight",
75
+ "shape": [
76
+ 2048
77
+ ],
78
+ "dtype": "bfloat16",
79
+ "format": "raw",
80
+ "nbytes": 4096,
81
+ "byteOffset": 8192000
82
+ },
83
+ {
84
+ "name": "model.layers.0.mlp.down_proj.q_weight",
85
+ "shape": [
86
+ 2048,
87
+ 704
88
+ ],
89
+ "dtype": "uint32",
90
+ "format": "f32-to-bf16",
91
+ "nbytes": 5767168,
92
+ "byteOffset": 8196096
93
+ },
94
+ {
95
+ "name": "model.layers.0.mlp.down_proj.q_scale",
96
+ "shape": [
97
+ 2048,
98
+ 176
99
+ ],
100
+ "dtype": "bfloat16",
101
+ "format": "raw",
102
+ "nbytes": 720896,
103
+ "byteOffset": 13963264
104
+ },
105
+ {
106
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
107
+ "shape": [
108
+ 11264,
109
+ 256
110
+ ],
111
+ "dtype": "uint32",
112
+ "format": "f32-to-bf16",
113
+ "nbytes": 11534336,
114
+ "byteOffset": 14684160
115
+ },
116
+ {
117
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
118
+ "shape": [
119
+ 11264,
120
+ 64
121
+ ],
122
+ "dtype": "bfloat16",
123
+ "format": "raw",
124
+ "nbytes": 1441792,
125
+ "byteOffset": 26218496
126
+ },
127
+ {
128
+ "name": "model.layers.0.post_attention_layernorm.weight",
129
+ "shape": [
130
+ 2048
131
+ ],
132
+ "dtype": "bfloat16",
133
+ "format": "raw",
134
+ "nbytes": 4096,
135
+ "byteOffset": 27660288
136
+ },
137
+ {
138
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
139
+ "shape": [
140
+ 2560,
141
+ 256
142
+ ],
143
+ "dtype": "uint32",
144
+ "format": "f32-to-bf16",
145
+ "nbytes": 2621440,
146
+ "byteOffset": 27664384
147
+ },
148
+ {
149
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
150
+ "shape": [
151
+ 2560,
152
+ 64
153
+ ],
154
+ "dtype": "bfloat16",
155
+ "format": "raw",
156
+ "nbytes": 327680,
157
+ "byteOffset": 30285824
158
+ },
159
+ {
160
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
161
+ "shape": [
162
+ 2048,
163
+ 256
164
+ ],
165
+ "dtype": "uint32",
166
+ "format": "f32-to-bf16",
167
+ "nbytes": 2097152,
168
+ "byteOffset": 30613504
169
+ },
170
+ {
171
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
172
+ "shape": [
173
+ 2048,
174
+ 64
175
+ ],
176
+ "dtype": "bfloat16",
177
+ "format": "raw",
178
+ "nbytes": 262144,
179
+ "byteOffset": 32710656
180
+ },
181
+ {
182
+ "name": "model.layers.1.input_layernorm.weight",
183
+ "shape": [
184
+ 2048
185
+ ],
186
+ "dtype": "bfloat16",
187
+ "format": "raw",
188
+ "nbytes": 4096,
189
+ "byteOffset": 32972800
190
+ }
191
+ ],
192
+ "md5sum": "b8052fae94b2f9484c7fed0f270034b6"
193
+ },
194
+ {
195
+ "dataPath": "params_shard_3.bin",
196
+ "format": "raw-shard",
197
+ "nbytes": 31268864,
198
+ "records": [
199
+ {
200
+ "name": "model.layers.1.mlp.down_proj.q_weight",
201
+ "shape": [
202
+ 2048,
203
+ 704
204
+ ],
205
+ "dtype": "uint32",
206
+ "format": "f32-to-bf16",
207
+ "nbytes": 5767168,
208
+ "byteOffset": 0
209
+ },
210
+ {
211
+ "name": "model.layers.1.mlp.down_proj.q_scale",
212
+ "shape": [
213
+ 2048,
214
+ 176
215
+ ],
216
+ "dtype": "bfloat16",
217
+ "format": "raw",
218
+ "nbytes": 720896,
219
+ "byteOffset": 5767168
220
+ },
221
+ {
222
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
223
+ "shape": [
224
+ 11264,
225
+ 256
226
+ ],
227
+ "dtype": "uint32",
228
+ "format": "f32-to-bf16",
229
+ "nbytes": 11534336,
230
+ "byteOffset": 6488064
231
+ },
232
+ {
233
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
234
+ "shape": [
235
+ 11264,
236
+ 64
237
+ ],
238
+ "dtype": "bfloat16",
239
+ "format": "raw",
240
+ "nbytes": 1441792,
241
+ "byteOffset": 18022400
242
+ },
243
+ {
244
+ "name": "model.layers.1.post_attention_layernorm.weight",
245
+ "shape": [
246
+ 2048
247
+ ],
248
+ "dtype": "bfloat16",
249
+ "format": "raw",
250
+ "nbytes": 4096,
251
+ "byteOffset": 19464192
252
+ },
253
+ {
254
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
255
+ "shape": [
256
+ 2560,
257
+ 256
258
+ ],
259
+ "dtype": "uint32",
260
+ "format": "f32-to-bf16",
261
+ "nbytes": 2621440,
262
+ "byteOffset": 19468288
263
+ },
264
+ {
265
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
266
+ "shape": [
267
+ 2560,
268
+ 64
269
+ ],
270
+ "dtype": "bfloat16",
271
+ "format": "raw",
272
+ "nbytes": 327680,
273
+ "byteOffset": 22089728
274
+ },
275
+ {
276
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
277
+ "shape": [
278
+ 2048,
279
+ 256
280
+ ],
281
+ "dtype": "uint32",
282
+ "format": "f32-to-bf16",
283
+ "nbytes": 2097152,
284
+ "byteOffset": 22417408
285
+ },
286
+ {
287
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
288
+ "shape": [
289
+ 2048,
290
+ 64
291
+ ],
292
+ "dtype": "bfloat16",
293
+ "format": "raw",
294
+ "nbytes": 262144,
295
+ "byteOffset": 24514560
296
+ },
297
+ {
298
+ "name": "model.layers.10.input_layernorm.weight",
299
+ "shape": [
300
+ 2048
301
+ ],
302
+ "dtype": "bfloat16",
303
+ "format": "raw",
304
+ "nbytes": 4096,
305
+ "byteOffset": 24776704
306
+ },
307
+ {
308
+ "name": "model.layers.10.mlp.down_proj.q_weight",
309
+ "shape": [
310
+ 2048,
311
+ 704
312
+ ],
313
+ "dtype": "uint32",
314
+ "format": "f32-to-bf16",
315
+ "nbytes": 5767168,
316
+ "byteOffset": 24780800
317
+ },
318
+ {
319
+ "name": "model.layers.10.mlp.down_proj.q_scale",
320
+ "shape": [
321
+ 2048,
322
+ 176
323
+ ],
324
+ "dtype": "bfloat16",
325
+ "format": "raw",
326
+ "nbytes": 720896,
327
+ "byteOffset": 30547968
328
+ }
329
+ ],
330
+ "md5sum": "236ea086fe698d15895a284f828cd334"
331
+ },
332
+ {
333
+ "dataPath": "params_shard_4.bin",
334
+ "format": "raw-shard",
335
+ "nbytes": 24780800,
336
+ "records": [
337
+ {
338
+ "name": "model.layers.10.mlp.gate_up_proj.q_weight",
339
+ "shape": [
340
+ 11264,
341
+ 256
342
+ ],
343
+ "dtype": "uint32",
344
+ "format": "f32-to-bf16",
345
+ "nbytes": 11534336,
346
+ "byteOffset": 0
347
+ },
348
+ {
349
+ "name": "model.layers.10.mlp.gate_up_proj.q_scale",
350
+ "shape": [
351
+ 11264,
352
+ 64
353
+ ],
354
+ "dtype": "bfloat16",
355
+ "format": "raw",
356
+ "nbytes": 1441792,
357
+ "byteOffset": 11534336
358
+ },
359
+ {
360
+ "name": "model.layers.10.post_attention_layernorm.weight",
361
+ "shape": [
362
+ 2048
363
+ ],
364
+ "dtype": "bfloat16",
365
+ "format": "raw",
366
+ "nbytes": 4096,
367
+ "byteOffset": 12976128
368
+ },
369
+ {
370
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
371
+ "shape": [
372
+ 2560,
373
+ 256
374
+ ],
375
+ "dtype": "uint32",
376
+ "format": "f32-to-bf16",
377
+ "nbytes": 2621440,
378
+ "byteOffset": 12980224
379
+ },
380
+ {
381
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
382
+ "shape": [
383
+ 2560,
384
+ 64
385
+ ],
386
+ "dtype": "bfloat16",
387
+ "format": "raw",
388
+ "nbytes": 327680,
389
+ "byteOffset": 15601664
390
+ },
391
+ {
392
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
393
+ "shape": [
394
+ 2048,
395
+ 256
396
+ ],
397
+ "dtype": "uint32",
398
+ "format": "f32-to-bf16",
399
+ "nbytes": 2097152,
400
+ "byteOffset": 15929344
401
+ },
402
+ {
403
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
404
+ "shape": [
405
+ 2048,
406
+ 64
407
+ ],
408
+ "dtype": "bfloat16",
409
+ "format": "raw",
410
+ "nbytes": 262144,
411
+ "byteOffset": 18026496
412
+ },
413
+ {
414
+ "name": "model.layers.11.input_layernorm.weight",
415
+ "shape": [
416
+ 2048
417
+ ],
418
+ "dtype": "bfloat16",
419
+ "format": "raw",
420
+ "nbytes": 4096,
421
+ "byteOffset": 18288640
422
+ },
423
+ {
424
+ "name": "model.layers.11.mlp.down_proj.q_weight",
425
+ "shape": [
426
+ 2048,
427
+ 704
428
+ ],
429
+ "dtype": "uint32",
430
+ "format": "f32-to-bf16",
431
+ "nbytes": 5767168,
432
+ "byteOffset": 18292736
433
+ },
434
+ {
435
+ "name": "model.layers.11.mlp.down_proj.q_scale",
436
+ "shape": [
437
+ 2048,
438
+ 176
439
+ ],
440
+ "dtype": "bfloat16",
441
+ "format": "raw",
442
+ "nbytes": 720896,
443
+ "byteOffset": 24059904
444
+ }
445
+ ],
446
+ "md5sum": "9f801a8ec12c49b630b273561a4d9554"
447
+ },
448
+ {
449
+ "dataPath": "params_shard_5.bin",
450
+ "format": "raw-shard",
451
+ "nbytes": 24780800,
452
+ "records": [
453
+ {
454
+ "name": "model.layers.11.mlp.gate_up_proj.q_weight",
455
+ "shape": [
456
+ 11264,
457
+ 256
458
+ ],
459
+ "dtype": "uint32",
460
+ "format": "f32-to-bf16",
461
+ "nbytes": 11534336,
462
+ "byteOffset": 0
463
+ },
464
+ {
465
+ "name": "model.layers.11.mlp.gate_up_proj.q_scale",
466
+ "shape": [
467
+ 11264,
468
+ 64
469
+ ],
470
+ "dtype": "bfloat16",
471
+ "format": "raw",
472
+ "nbytes": 1441792,
473
+ "byteOffset": 11534336
474
+ },
475
+ {
476
+ "name": "model.layers.11.post_attention_layernorm.weight",
477
+ "shape": [
478
+ 2048
479
+ ],
480
+ "dtype": "bfloat16",
481
+ "format": "raw",
482
+ "nbytes": 4096,
483
+ "byteOffset": 12976128
484
+ },
485
+ {
486
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
487
+ "shape": [
488
+ 2560,
489
+ 256
490
+ ],
491
+ "dtype": "uint32",
492
+ "format": "f32-to-bf16",
493
+ "nbytes": 2621440,
494
+ "byteOffset": 12980224
495
+ },
496
+ {
497
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
498
+ "shape": [
499
+ 2560,
500
+ 64
501
+ ],
502
+ "dtype": "bfloat16",
503
+ "format": "raw",
504
+ "nbytes": 327680,
505
+ "byteOffset": 15601664
506
+ },
507
+ {
508
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
509
+ "shape": [
510
+ 2048,
511
+ 256
512
+ ],
513
+ "dtype": "uint32",
514
+ "format": "f32-to-bf16",
515
+ "nbytes": 2097152,
516
+ "byteOffset": 15929344
517
+ },
518
+ {
519
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
520
+ "shape": [
521
+ 2048,
522
+ 64
523
+ ],
524
+ "dtype": "bfloat16",
525
+ "format": "raw",
526
+ "nbytes": 262144,
527
+ "byteOffset": 18026496
528
+ },
529
+ {
530
+ "name": "model.layers.12.input_layernorm.weight",
531
+ "shape": [
532
+ 2048
533
+ ],
534
+ "dtype": "bfloat16",
535
+ "format": "raw",
536
+ "nbytes": 4096,
537
+ "byteOffset": 18288640
538
+ },
539
+ {
540
+ "name": "model.layers.12.mlp.down_proj.q_weight",
541
+ "shape": [
542
+ 2048,
543
+ 704
544
+ ],
545
+ "dtype": "uint32",
546
+ "format": "f32-to-bf16",
547
+ "nbytes": 5767168,
548
+ "byteOffset": 18292736
549
+ },
550
+ {
551
+ "name": "model.layers.12.mlp.down_proj.q_scale",
552
+ "shape": [
553
+ 2048,
554
+ 176
555
+ ],
556
+ "dtype": "bfloat16",
557
+ "format": "raw",
558
+ "nbytes": 720896,
559
+ "byteOffset": 24059904
560
+ }
561
+ ],
562
+ "md5sum": "4a90502242460bc5d815ab87dcc2c3fc"
563
+ },
564
+ {
565
+ "dataPath": "params_shard_6.bin",
566
+ "format": "raw-shard",
567
+ "nbytes": 24780800,
568
+ "records": [
569
+ {
570
+ "name": "model.layers.12.mlp.gate_up_proj.q_weight",
571
+ "shape": [
572
+ 11264,
573
+ 256
574
+ ],
575
+ "dtype": "uint32",
576
+ "format": "f32-to-bf16",
577
+ "nbytes": 11534336,
578
+ "byteOffset": 0
579
+ },
580
+ {
581
+ "name": "model.layers.12.mlp.gate_up_proj.q_scale",
582
+ "shape": [
583
+ 11264,
584
+ 64
585
+ ],
586
+ "dtype": "bfloat16",
587
+ "format": "raw",
588
+ "nbytes": 1441792,
589
+ "byteOffset": 11534336
590
+ },
591
+ {
592
+ "name": "model.layers.12.post_attention_layernorm.weight",
593
+ "shape": [
594
+ 2048
595
+ ],
596
+ "dtype": "bfloat16",
597
+ "format": "raw",
598
+ "nbytes": 4096,
599
+ "byteOffset": 12976128
600
+ },
601
+ {
602
+ "name": "model.layers.12.self_attn.qkv_proj.q_weight",
603
+ "shape": [
604
+ 2560,
605
+ 256
606
+ ],
607
+ "dtype": "uint32",
608
+ "format": "f32-to-bf16",
609
+ "nbytes": 2621440,
610
+ "byteOffset": 12980224
611
+ },
612
+ {
613
+ "name": "model.layers.12.self_attn.qkv_proj.q_scale",
614
+ "shape": [
615
+ 2560,
616
+ 64
617
+ ],
618
+ "dtype": "bfloat16",
619
+ "format": "raw",
620
+ "nbytes": 327680,
621
+ "byteOffset": 15601664
622
+ },
623
+ {
624
+ "name": "model.layers.12.self_attn.o_proj.q_weight",
625
+ "shape": [
626
+ 2048,
627
+ 256
628
+ ],
629
+ "dtype": "uint32",
630
+ "format": "f32-to-bf16",
631
+ "nbytes": 2097152,
632
+ "byteOffset": 15929344
633
+ },
634
+ {
635
+ "name": "model.layers.12.self_attn.o_proj.q_scale",
636
+ "shape": [
637
+ 2048,
638
+ 64
639
+ ],
640
+ "dtype": "bfloat16",
641
+ "format": "raw",
642
+ "nbytes": 262144,
643
+ "byteOffset": 18026496
644
+ },
645
+ {
646
+ "name": "model.layers.13.input_layernorm.weight",
647
+ "shape": [
648
+ 2048
649
+ ],
650
+ "dtype": "bfloat16",
651
+ "format": "raw",
652
+ "nbytes": 4096,
653
+ "byteOffset": 18288640
654
+ },
655
+ {
656
+ "name": "model.layers.13.mlp.down_proj.q_weight",
657
+ "shape": [
658
+ 2048,
659
+ 704
660
+ ],
661
+ "dtype": "uint32",
662
+ "format": "f32-to-bf16",
663
+ "nbytes": 5767168,
664
+ "byteOffset": 18292736
665
+ },
666
+ {
667
+ "name": "model.layers.13.mlp.down_proj.q_scale",
668
+ "shape": [
669
+ 2048,
670
+ 176
671
+ ],
672
+ "dtype": "bfloat16",
673
+ "format": "raw",
674
+ "nbytes": 720896,
675
+ "byteOffset": 24059904
676
+ }
677
+ ],
678
+ "md5sum": "4bc1e064d362ffbf9bae57c15e735e8d"
679
+ },
680
+ {
681
+ "dataPath": "params_shard_7.bin",
682
+ "format": "raw-shard",
683
+ "nbytes": 24780800,
684
+ "records": [
685
+ {
686
+ "name": "model.layers.13.mlp.gate_up_proj.q_weight",
687
+ "shape": [
688
+ 11264,
689
+ 256
690
+ ],
691
+ "dtype": "uint32",
692
+ "format": "f32-to-bf16",
693
+ "nbytes": 11534336,
694
+ "byteOffset": 0
695
+ },
696
+ {
697
+ "name": "model.layers.13.mlp.gate_up_proj.q_scale",
698
+ "shape": [
699
+ 11264,
700
+ 64
701
+ ],
702
+ "dtype": "bfloat16",
703
+ "format": "raw",
704
+ "nbytes": 1441792,
705
+ "byteOffset": 11534336
706
+ },
707
+ {
708
+ "name": "model.layers.13.post_attention_layernorm.weight",
709
+ "shape": [
710
+ 2048
711
+ ],
712
+ "dtype": "bfloat16",
713
+ "format": "raw",
714
+ "nbytes": 4096,
715
+ "byteOffset": 12976128
716
+ },
717
+ {
718
+ "name": "model.layers.13.self_attn.qkv_proj.q_weight",
719
+ "shape": [
720
+ 2560,
721
+ 256
722
+ ],
723
+ "dtype": "uint32",
724
+ "format": "f32-to-bf16",
725
+ "nbytes": 2621440,
726
+ "byteOffset": 12980224
727
+ },
728
+ {
729
+ "name": "model.layers.13.self_attn.qkv_proj.q_scale",
730
+ "shape": [
731
+ 2560,
732
+ 64
733
+ ],
734
+ "dtype": "bfloat16",
735
+ "format": "raw",
736
+ "nbytes": 327680,
737
+ "byteOffset": 15601664
738
+ },
739
+ {
740
+ "name": "model.layers.13.self_attn.o_proj.q_weight",
741
+ "shape": [
742
+ 2048,
743
+ 256
744
+ ],
745
+ "dtype": "uint32",
746
+ "format": "f32-to-bf16",
747
+ "nbytes": 2097152,
748
+ "byteOffset": 15929344
749
+ },
750
+ {
751
+ "name": "model.layers.13.self_attn.o_proj.q_scale",
752
+ "shape": [
753
+ 2048,
754
+ 64
755
+ ],
756
+ "dtype": "bfloat16",
757
+ "format": "raw",
758
+ "nbytes": 262144,
759
+ "byteOffset": 18026496
760
+ },
761
+ {
762
+ "name": "model.layers.14.input_layernorm.weight",
763
+ "shape": [
764
+ 2048
765
+ ],
766
+ "dtype": "bfloat16",
767
+ "format": "raw",
768
+ "nbytes": 4096,
769
+ "byteOffset": 18288640
770
+ },
771
+ {
772
+ "name": "model.layers.14.mlp.down_proj.q_weight",
773
+ "shape": [
774
+ 2048,
775
+ 704
776
+ ],
777
+ "dtype": "uint32",
778
+ "format": "f32-to-bf16",
779
+ "nbytes": 5767168,
780
+ "byteOffset": 18292736
781
+ },
782
+ {
783
+ "name": "model.layers.14.mlp.down_proj.q_scale",
784
+ "shape": [
785
+ 2048,
786
+ 176
787
+ ],
788
+ "dtype": "bfloat16",
789
+ "format": "raw",
790
+ "nbytes": 720896,
791
+ "byteOffset": 24059904
792
+ }
793
+ ],
794
+ "md5sum": "85a8f0c3b9857df7a6fa1b9cbbb5d927"
795
+ },
796
+ {
797
+ "dataPath": "params_shard_8.bin",
798
+ "format": "raw-shard",
799
+ "nbytes": 24780800,
800
+ "records": [
801
+ {
802
+ "name": "model.layers.14.mlp.gate_up_proj.q_weight",
803
+ "shape": [
804
+ 11264,
805
+ 256
806
+ ],
807
+ "dtype": "uint32",
808
+ "format": "f32-to-bf16",
809
+ "nbytes": 11534336,
810
+ "byteOffset": 0
811
+ },
812
+ {
813
+ "name": "model.layers.14.mlp.gate_up_proj.q_scale",
814
+ "shape": [
815
+ 11264,
816
+ 64
817
+ ],
818
+ "dtype": "bfloat16",
819
+ "format": "raw",
820
+ "nbytes": 1441792,
821
+ "byteOffset": 11534336
822
+ },
823
+ {
824
+ "name": "model.layers.14.post_attention_layernorm.weight",
825
+ "shape": [
826
+ 2048
827
+ ],
828
+ "dtype": "bfloat16",
829
+ "format": "raw",
830
+ "nbytes": 4096,
831
+ "byteOffset": 12976128
832
+ },
833
+ {
834
+ "name": "model.layers.14.self_attn.qkv_proj.q_weight",
835
+ "shape": [
836
+ 2560,
837
+ 256
838
+ ],
839
+ "dtype": "uint32",
840
+ "format": "f32-to-bf16",
841
+ "nbytes": 2621440,
842
+ "byteOffset": 12980224
843
+ },
844
+ {
845
+ "name": "model.layers.14.self_attn.qkv_proj.q_scale",
846
+ "shape": [
847
+ 2560,
848
+ 64
849
+ ],
850
+ "dtype": "bfloat16",
851
+ "format": "raw",
852
+ "nbytes": 327680,
853
+ "byteOffset": 15601664
854
+ },
855
+ {
856
+ "name": "model.layers.14.self_attn.o_proj.q_weight",
857
+ "shape": [
858
+ 2048,
859
+ 256
860
+ ],
861
+ "dtype": "uint32",
862
+ "format": "f32-to-bf16",
863
+ "nbytes": 2097152,
864
+ "byteOffset": 15929344
865
+ },
866
+ {
867
+ "name": "model.layers.14.self_attn.o_proj.q_scale",
868
+ "shape": [
869
+ 2048,
870
+ 64
871
+ ],
872
+ "dtype": "bfloat16",
873
+ "format": "raw",
874
+ "nbytes": 262144,
875
+ "byteOffset": 18026496
876
+ },
877
+ {
878
+ "name": "model.layers.15.input_layernorm.weight",
879
+ "shape": [
880
+ 2048
881
+ ],
882
+ "dtype": "bfloat16",
883
+ "format": "raw",
884
+ "nbytes": 4096,
885
+ "byteOffset": 18288640
886
+ },
887
+ {
888
+ "name": "model.layers.15.mlp.down_proj.q_weight",
889
+ "shape": [
890
+ 2048,
891
+ 704
892
+ ],
893
+ "dtype": "uint32",
894
+ "format": "f32-to-bf16",
895
+ "nbytes": 5767168,
896
+ "byteOffset": 18292736
897
+ },
898
+ {
899
+ "name": "model.layers.15.mlp.down_proj.q_scale",
900
+ "shape": [
901
+ 2048,
902
+ 176
903
+ ],
904
+ "dtype": "bfloat16",
905
+ "format": "raw",
906
+ "nbytes": 720896,
907
+ "byteOffset": 24059904
908
+ }
909
+ ],
910
+ "md5sum": "18e9c491a88f7adc43ed17d6f940f080"
911
+ },
912
+ {
913
+ "dataPath": "params_shard_9.bin",
914
+ "format": "raw-shard",
915
+ "nbytes": 24780800,
916
+ "records": [
917
+ {
918
+ "name": "model.layers.15.mlp.gate_up_proj.q_weight",
919
+ "shape": [
920
+ 11264,
921
+ 256
922
+ ],
923
+ "dtype": "uint32",
924
+ "format": "f32-to-bf16",
925
+ "nbytes": 11534336,
926
+ "byteOffset": 0
927
+ },
928
+ {
929
+ "name": "model.layers.15.mlp.gate_up_proj.q_scale",
930
+ "shape": [
931
+ 11264,
932
+ 64
933
+ ],
934
+ "dtype": "bfloat16",
935
+ "format": "raw",
936
+ "nbytes": 1441792,
937
+ "byteOffset": 11534336
938
+ },
939
+ {
940
+ "name": "model.layers.15.post_attention_layernorm.weight",
941
+ "shape": [
942
+ 2048
943
+ ],
944
+ "dtype": "bfloat16",
945
+ "format": "raw",
946
+ "nbytes": 4096,
947
+ "byteOffset": 12976128
948
+ },
949
+ {
950
+ "name": "model.layers.15.self_attn.qkv_proj.q_weight",
951
+ "shape": [
952
+ 2560,
953
+ 256
954
+ ],
955
+ "dtype": "uint32",
956
+ "format": "f32-to-bf16",
957
+ "nbytes": 2621440,
958
+ "byteOffset": 12980224
959
+ },
960
+ {
961
+ "name": "model.layers.15.self_attn.qkv_proj.q_scale",
962
+ "shape": [
963
+ 2560,
964
+ 64
965
+ ],
966
+ "dtype": "bfloat16",
967
+ "format": "raw",
968
+ "nbytes": 327680,
969
+ "byteOffset": 15601664
970
+ },
971
+ {
972
+ "name": "model.layers.15.self_attn.o_proj.q_weight",
973
+ "shape": [
974
+ 2048,
975
+ 256
976
+ ],
977
+ "dtype": "uint32",
978
+ "format": "f32-to-bf16",
979
+ "nbytes": 2097152,
980
+ "byteOffset": 15929344
981
+ },
982
+ {
983
+ "name": "model.layers.15.self_attn.o_proj.q_scale",
984
+ "shape": [
985
+ 2048,
986
+ 64
987
+ ],
988
+ "dtype": "bfloat16",
989
+ "format": "raw",
990
+ "nbytes": 262144,
991
+ "byteOffset": 18026496
992
+ },
993
+ {
994
+ "name": "model.layers.16.input_layernorm.weight",
995
+ "shape": [
996
+ 2048
997
+ ],
998
+ "dtype": "bfloat16",
999
+ "format": "raw",
1000
+ "nbytes": 4096,
1001
+ "byteOffset": 18288640
1002
+ },
1003
+ {
1004
+ "name": "model.layers.16.mlp.down_proj.q_weight",
1005
+ "shape": [
1006
+ 2048,
1007
+ 704
1008
+ ],
1009
+ "dtype": "uint32",
1010
+ "format": "f32-to-bf16",
1011
+ "nbytes": 5767168,
1012
+ "byteOffset": 18292736
1013
+ },
1014
+ {
1015
+ "name": "model.layers.16.mlp.down_proj.q_scale",
1016
+ "shape": [
1017
+ 2048,
1018
+ 176
1019
+ ],
1020
+ "dtype": "bfloat16",
1021
+ "format": "raw",
1022
+ "nbytes": 720896,
1023
+ "byteOffset": 24059904
1024
+ }
1025
+ ],
1026
+ "md5sum": "ebe88a618b7cb4980864878efb2bdbb7"
1027
+ },
1028
+ {
1029
+ "dataPath": "params_shard_10.bin",
1030
+ "format": "raw-shard",
1031
+ "nbytes": 24780800,
1032
+ "records": [
1033
+ {
1034
+ "name": "model.layers.16.mlp.gate_up_proj.q_weight",
1035
+ "shape": [
1036
+ 11264,
1037
+ 256
1038
+ ],
1039
+ "dtype": "uint32",
1040
+ "format": "f32-to-bf16",
1041
+ "nbytes": 11534336,
1042
+ "byteOffset": 0
1043
+ },
1044
+ {
1045
+ "name": "model.layers.16.mlp.gate_up_proj.q_scale",
1046
+ "shape": [
1047
+ 11264,
1048
+ 64
1049
+ ],
1050
+ "dtype": "bfloat16",
1051
+ "format": "raw",
1052
+ "nbytes": 1441792,
1053
+ "byteOffset": 11534336
1054
+ },
1055
+ {
1056
+ "name": "model.layers.16.post_attention_layernorm.weight",
1057
+ "shape": [
1058
+ 2048
1059
+ ],
1060
+ "dtype": "bfloat16",
1061
+ "format": "raw",
1062
+ "nbytes": 4096,
1063
+ "byteOffset": 12976128
1064
+ },
1065
+ {
1066
+ "name": "model.layers.16.self_attn.qkv_proj.q_weight",
1067
+ "shape": [
1068
+ 2560,
1069
+ 256
1070
+ ],
1071
+ "dtype": "uint32",
1072
+ "format": "f32-to-bf16",
1073
+ "nbytes": 2621440,
1074
+ "byteOffset": 12980224
1075
+ },
1076
+ {
1077
+ "name": "model.layers.16.self_attn.qkv_proj.q_scale",
1078
+ "shape": [
1079
+ 2560,
1080
+ 64
1081
+ ],
1082
+ "dtype": "bfloat16",
1083
+ "format": "raw",
1084
+ "nbytes": 327680,
1085
+ "byteOffset": 15601664
1086
+ },
1087
+ {
1088
+ "name": "model.layers.16.self_attn.o_proj.q_weight",
1089
+ "shape": [
1090
+ 2048,
1091
+ 256
1092
+ ],
1093
+ "dtype": "uint32",
1094
+ "format": "f32-to-bf16",
1095
+ "nbytes": 2097152,
1096
+ "byteOffset": 15929344
1097
+ },
1098
+ {
1099
+ "name": "model.layers.16.self_attn.o_proj.q_scale",
1100
+ "shape": [
1101
+ 2048,
1102
+ 64
1103
+ ],
1104
+ "dtype": "bfloat16",
1105
+ "format": "raw",
1106
+ "nbytes": 262144,
1107
+ "byteOffset": 18026496
1108
+ },
1109
+ {
1110
+ "name": "model.layers.17.input_layernorm.weight",
1111
+ "shape": [
1112
+ 2048
1113
+ ],
1114
+ "dtype": "bfloat16",
1115
+ "format": "raw",
1116
+ "nbytes": 4096,
1117
+ "byteOffset": 18288640
1118
+ },
1119
+ {
1120
+ "name": "model.layers.17.mlp.down_proj.q_weight",
1121
+ "shape": [
1122
+ 2048,
1123
+ 704
1124
+ ],
1125
+ "dtype": "uint32",
1126
+ "format": "f32-to-bf16",
1127
+ "nbytes": 5767168,
1128
+ "byteOffset": 18292736
1129
+ },
1130
+ {
1131
+ "name": "model.layers.17.mlp.down_proj.q_scale",
1132
+ "shape": [
1133
+ 2048,
1134
+ 176
1135
+ ],
1136
+ "dtype": "bfloat16",
1137
+ "format": "raw",
1138
+ "nbytes": 720896,
1139
+ "byteOffset": 24059904
1140
+ }
1141
+ ],
1142
+ "md5sum": "2a298c0af58f1e1a208b199e40b49972"
1143
+ },
1144
+ {
1145
+ "dataPath": "params_shard_11.bin",
1146
+ "format": "raw-shard",
1147
+ "nbytes": 24780800,
1148
+ "records": [
1149
+ {
1150
+ "name": "model.layers.17.mlp.gate_up_proj.q_weight",
1151
+ "shape": [
1152
+ 11264,
1153
+ 256
1154
+ ],
1155
+ "dtype": "uint32",
1156
+ "format": "f32-to-bf16",
1157
+ "nbytes": 11534336,
1158
+ "byteOffset": 0
1159
+ },
1160
+ {
1161
+ "name": "model.layers.17.mlp.gate_up_proj.q_scale",
1162
+ "shape": [
1163
+ 11264,
1164
+ 64
1165
+ ],
1166
+ "dtype": "bfloat16",
1167
+ "format": "raw",
1168
+ "nbytes": 1441792,
1169
+ "byteOffset": 11534336
1170
+ },
1171
+ {
1172
+ "name": "model.layers.17.post_attention_layernorm.weight",
1173
+ "shape": [
1174
+ 2048
1175
+ ],
1176
+ "dtype": "bfloat16",
1177
+ "format": "raw",
1178
+ "nbytes": 4096,
1179
+ "byteOffset": 12976128
1180
+ },
1181
+ {
1182
+ "name": "model.layers.17.self_attn.qkv_proj.q_weight",
1183
+ "shape": [
1184
+ 2560,
1185
+ 256
1186
+ ],
1187
+ "dtype": "uint32",
1188
+ "format": "f32-to-bf16",
1189
+ "nbytes": 2621440,
1190
+ "byteOffset": 12980224
1191
+ },
1192
+ {
1193
+ "name": "model.layers.17.self_attn.qkv_proj.q_scale",
1194
+ "shape": [
1195
+ 2560,
1196
+ 64
1197
+ ],
1198
+ "dtype": "bfloat16",
1199
+ "format": "raw",
1200
+ "nbytes": 327680,
1201
+ "byteOffset": 15601664
1202
+ },
1203
+ {
1204
+ "name": "model.layers.17.self_attn.o_proj.q_weight",
1205
+ "shape": [
1206
+ 2048,
1207
+ 256
1208
+ ],
1209
+ "dtype": "uint32",
1210
+ "format": "f32-to-bf16",
1211
+ "nbytes": 2097152,
1212
+ "byteOffset": 15929344
1213
+ },
1214
+ {
1215
+ "name": "model.layers.17.self_attn.o_proj.q_scale",
1216
+ "shape": [
1217
+ 2048,
1218
+ 64
1219
+ ],
1220
+ "dtype": "bfloat16",
1221
+ "format": "raw",
1222
+ "nbytes": 262144,
1223
+ "byteOffset": 18026496
1224
+ },
1225
+ {
1226
+ "name": "model.layers.18.input_layernorm.weight",
1227
+ "shape": [
1228
+ 2048
1229
+ ],
1230
+ "dtype": "bfloat16",
1231
+ "format": "raw",
1232
+ "nbytes": 4096,
1233
+ "byteOffset": 18288640
1234
+ },
1235
+ {
1236
+ "name": "model.layers.18.mlp.down_proj.q_weight",
1237
+ "shape": [
1238
+ 2048,
1239
+ 704
1240
+ ],
1241
+ "dtype": "uint32",
1242
+ "format": "f32-to-bf16",
1243
+ "nbytes": 5767168,
1244
+ "byteOffset": 18292736
1245
+ },
1246
+ {
1247
+ "name": "model.layers.18.mlp.down_proj.q_scale",
1248
+ "shape": [
1249
+ 2048,
1250
+ 176
1251
+ ],
1252
+ "dtype": "bfloat16",
1253
+ "format": "raw",
1254
+ "nbytes": 720896,
1255
+ "byteOffset": 24059904
1256
+ }
1257
+ ],
1258
+ "md5sum": "8a6ef364911a1049b2c0531be0fc15b6"
1259
+ },
1260
+ {
1261
+ "dataPath": "params_shard_12.bin",
1262
+ "format": "raw-shard",
1263
+ "nbytes": 24780800,
1264
+ "records": [
1265
+ {
1266
+ "name": "model.layers.18.mlp.gate_up_proj.q_weight",
1267
+ "shape": [
1268
+ 11264,
1269
+ 256
1270
+ ],
1271
+ "dtype": "uint32",
1272
+ "format": "f32-to-bf16",
1273
+ "nbytes": 11534336,
1274
+ "byteOffset": 0
1275
+ },
1276
+ {
1277
+ "name": "model.layers.18.mlp.gate_up_proj.q_scale",
1278
+ "shape": [
1279
+ 11264,
1280
+ 64
1281
+ ],
1282
+ "dtype": "bfloat16",
1283
+ "format": "raw",
1284
+ "nbytes": 1441792,
1285
+ "byteOffset": 11534336
1286
+ },
1287
+ {
1288
+ "name": "model.layers.18.post_attention_layernorm.weight",
1289
+ "shape": [
1290
+ 2048
1291
+ ],
1292
+ "dtype": "bfloat16",
1293
+ "format": "raw",
1294
+ "nbytes": 4096,
1295
+ "byteOffset": 12976128
1296
+ },
1297
+ {
1298
+ "name": "model.layers.18.self_attn.qkv_proj.q_weight",
1299
+ "shape": [
1300
+ 2560,
1301
+ 256
1302
+ ],
1303
+ "dtype": "uint32",
1304
+ "format": "f32-to-bf16",
1305
+ "nbytes": 2621440,
1306
+ "byteOffset": 12980224
1307
+ },
1308
+ {
1309
+ "name": "model.layers.18.self_attn.qkv_proj.q_scale",
1310
+ "shape": [
1311
+ 2560,
1312
+ 64
1313
+ ],
1314
+ "dtype": "bfloat16",
1315
+ "format": "raw",
1316
+ "nbytes": 327680,
1317
+ "byteOffset": 15601664
1318
+ },
1319
+ {
1320
+ "name": "model.layers.18.self_attn.o_proj.q_weight",
1321
+ "shape": [
1322
+ 2048,
1323
+ 256
1324
+ ],
1325
+ "dtype": "uint32",
1326
+ "format": "f32-to-bf16",
1327
+ "nbytes": 2097152,
1328
+ "byteOffset": 15929344
1329
+ },
1330
+ {
1331
+ "name": "model.layers.18.self_attn.o_proj.q_scale",
1332
+ "shape": [
1333
+ 2048,
1334
+ 64
1335
+ ],
1336
+ "dtype": "bfloat16",
1337
+ "format": "raw",
1338
+ "nbytes": 262144,
1339
+ "byteOffset": 18026496
1340
+ },
1341
+ {
1342
+ "name": "model.layers.19.input_layernorm.weight",
1343
+ "shape": [
1344
+ 2048
1345
+ ],
1346
+ "dtype": "bfloat16",
1347
+ "format": "raw",
1348
+ "nbytes": 4096,
1349
+ "byteOffset": 18288640
1350
+ },
1351
+ {
1352
+ "name": "model.layers.19.mlp.down_proj.q_weight",
1353
+ "shape": [
1354
+ 2048,
1355
+ 704
1356
+ ],
1357
+ "dtype": "uint32",
1358
+ "format": "f32-to-bf16",
1359
+ "nbytes": 5767168,
1360
+ "byteOffset": 18292736
1361
+ },
1362
+ {
1363
+ "name": "model.layers.19.mlp.down_proj.q_scale",
1364
+ "shape": [
1365
+ 2048,
1366
+ 176
1367
+ ],
1368
+ "dtype": "bfloat16",
1369
+ "format": "raw",
1370
+ "nbytes": 720896,
1371
+ "byteOffset": 24059904
1372
+ }
1373
+ ],
1374
+ "md5sum": "13ffdc8591e96c1c5f27090d70144bd0"
1375
+ },
1376
+ {
1377
+ "dataPath": "params_shard_13.bin",
1378
+ "format": "raw-shard",
1379
+ "nbytes": 24780800,
1380
+ "records": [
1381
+ {
1382
+ "name": "model.layers.19.mlp.gate_up_proj.q_weight",
1383
+ "shape": [
1384
+ 11264,
1385
+ 256
1386
+ ],
1387
+ "dtype": "uint32",
1388
+ "format": "f32-to-bf16",
1389
+ "nbytes": 11534336,
1390
+ "byteOffset": 0
1391
+ },
1392
+ {
1393
+ "name": "model.layers.19.mlp.gate_up_proj.q_scale",
1394
+ "shape": [
1395
+ 11264,
1396
+ 64
1397
+ ],
1398
+ "dtype": "bfloat16",
1399
+ "format": "raw",
1400
+ "nbytes": 1441792,
1401
+ "byteOffset": 11534336
1402
+ },
1403
+ {
1404
+ "name": "model.layers.19.post_attention_layernorm.weight",
1405
+ "shape": [
1406
+ 2048
1407
+ ],
1408
+ "dtype": "bfloat16",
1409
+ "format": "raw",
1410
+ "nbytes": 4096,
1411
+ "byteOffset": 12976128
1412
+ },
1413
+ {
1414
+ "name": "model.layers.19.self_attn.qkv_proj.q_weight",
1415
+ "shape": [
1416
+ 2560,
1417
+ 256
1418
+ ],
1419
+ "dtype": "uint32",
1420
+ "format": "f32-to-bf16",
1421
+ "nbytes": 2621440,
1422
+ "byteOffset": 12980224
1423
+ },
1424
+ {
1425
+ "name": "model.layers.19.self_attn.qkv_proj.q_scale",
1426
+ "shape": [
1427
+ 2560,
1428
+ 64
1429
+ ],
1430
+ "dtype": "bfloat16",
1431
+ "format": "raw",
1432
+ "nbytes": 327680,
1433
+ "byteOffset": 15601664
1434
+ },
1435
+ {
1436
+ "name": "model.layers.19.self_attn.o_proj.q_weight",
1437
+ "shape": [
1438
+ 2048,
1439
+ 256
1440
+ ],
1441
+ "dtype": "uint32",
1442
+ "format": "f32-to-bf16",
1443
+ "nbytes": 2097152,
1444
+ "byteOffset": 15929344
1445
+ },
1446
+ {
1447
+ "name": "model.layers.19.self_attn.o_proj.q_scale",
1448
+ "shape": [
1449
+ 2048,
1450
+ 64
1451
+ ],
1452
+ "dtype": "bfloat16",
1453
+ "format": "raw",
1454
+ "nbytes": 262144,
1455
+ "byteOffset": 18026496
1456
+ },
1457
+ {
1458
+ "name": "model.layers.2.input_layernorm.weight",
1459
+ "shape": [
1460
+ 2048
1461
+ ],
1462
+ "dtype": "bfloat16",
1463
+ "format": "raw",
1464
+ "nbytes": 4096,
1465
+ "byteOffset": 18288640
1466
+ },
1467
+ {
1468
+ "name": "model.layers.2.mlp.down_proj.q_weight",
1469
+ "shape": [
1470
+ 2048,
1471
+ 704
1472
+ ],
1473
+ "dtype": "uint32",
1474
+ "format": "f32-to-bf16",
1475
+ "nbytes": 5767168,
1476
+ "byteOffset": 18292736
1477
+ },
1478
+ {
1479
+ "name": "model.layers.2.mlp.down_proj.q_scale",
1480
+ "shape": [
1481
+ 2048,
1482
+ 176
1483
+ ],
1484
+ "dtype": "bfloat16",
1485
+ "format": "raw",
1486
+ "nbytes": 720896,
1487
+ "byteOffset": 24059904
1488
+ }
1489
+ ],
1490
+ "md5sum": "35244475eb557462112a7fa2d3db4dc9"
1491
+ },
1492
+ {
1493
+ "dataPath": "params_shard_14.bin",
1494
+ "format": "raw-shard",
1495
+ "nbytes": 24780800,
1496
+ "records": [
1497
+ {
1498
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
1499
+ "shape": [
1500
+ 11264,
1501
+ 256
1502
+ ],
1503
+ "dtype": "uint32",
1504
+ "format": "f32-to-bf16",
1505
+ "nbytes": 11534336,
1506
+ "byteOffset": 0
1507
+ },
1508
+ {
1509
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
1510
+ "shape": [
1511
+ 11264,
1512
+ 64
1513
+ ],
1514
+ "dtype": "bfloat16",
1515
+ "format": "raw",
1516
+ "nbytes": 1441792,
1517
+ "byteOffset": 11534336
1518
+ },
1519
+ {
1520
+ "name": "model.layers.2.post_attention_layernorm.weight",
1521
+ "shape": [
1522
+ 2048
1523
+ ],
1524
+ "dtype": "bfloat16",
1525
+ "format": "raw",
1526
+ "nbytes": 4096,
1527
+ "byteOffset": 12976128
1528
+ },
1529
+ {
1530
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
1531
+ "shape": [
1532
+ 2560,
1533
+ 256
1534
+ ],
1535
+ "dtype": "uint32",
1536
+ "format": "f32-to-bf16",
1537
+ "nbytes": 2621440,
1538
+ "byteOffset": 12980224
1539
+ },
1540
+ {
1541
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
1542
+ "shape": [
1543
+ 2560,
1544
+ 64
1545
+ ],
1546
+ "dtype": "bfloat16",
1547
+ "format": "raw",
1548
+ "nbytes": 327680,
1549
+ "byteOffset": 15601664
1550
+ },
1551
+ {
1552
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
1553
+ "shape": [
1554
+ 2048,
1555
+ 256
1556
+ ],
1557
+ "dtype": "uint32",
1558
+ "format": "f32-to-bf16",
1559
+ "nbytes": 2097152,
1560
+ "byteOffset": 15929344
1561
+ },
1562
+ {
1563
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
1564
+ "shape": [
1565
+ 2048,
1566
+ 64
1567
+ ],
1568
+ "dtype": "bfloat16",
1569
+ "format": "raw",
1570
+ "nbytes": 262144,
1571
+ "byteOffset": 18026496
1572
+ },
1573
+ {
1574
+ "name": "model.layers.20.input_layernorm.weight",
1575
+ "shape": [
1576
+ 2048
1577
+ ],
1578
+ "dtype": "bfloat16",
1579
+ "format": "raw",
1580
+ "nbytes": 4096,
1581
+ "byteOffset": 18288640
1582
+ },
1583
+ {
1584
+ "name": "model.layers.20.mlp.down_proj.q_weight",
1585
+ "shape": [
1586
+ 2048,
1587
+ 704
1588
+ ],
1589
+ "dtype": "uint32",
1590
+ "format": "f32-to-bf16",
1591
+ "nbytes": 5767168,
1592
+ "byteOffset": 18292736
1593
+ },
1594
+ {
1595
+ "name": "model.layers.20.mlp.down_proj.q_scale",
1596
+ "shape": [
1597
+ 2048,
1598
+ 176
1599
+ ],
1600
+ "dtype": "bfloat16",
1601
+ "format": "raw",
1602
+ "nbytes": 720896,
1603
+ "byteOffset": 24059904
1604
+ }
1605
+ ],
1606
+ "md5sum": "f87ee6617b7866df37b8c66980d7144f"
1607
+ },
1608
+ {
1609
+ "dataPath": "params_shard_15.bin",
1610
+ "format": "raw-shard",
1611
+ "nbytes": 24780800,
1612
+ "records": [
1613
+ {
1614
+ "name": "model.layers.20.mlp.gate_up_proj.q_weight",
1615
+ "shape": [
1616
+ 11264,
1617
+ 256
1618
+ ],
1619
+ "dtype": "uint32",
1620
+ "format": "f32-to-bf16",
1621
+ "nbytes": 11534336,
1622
+ "byteOffset": 0
1623
+ },
1624
+ {
1625
+ "name": "model.layers.20.mlp.gate_up_proj.q_scale",
1626
+ "shape": [
1627
+ 11264,
1628
+ 64
1629
+ ],
1630
+ "dtype": "bfloat16",
1631
+ "format": "raw",
1632
+ "nbytes": 1441792,
1633
+ "byteOffset": 11534336
1634
+ },
1635
+ {
1636
+ "name": "model.layers.20.post_attention_layernorm.weight",
1637
+ "shape": [
1638
+ 2048
1639
+ ],
1640
+ "dtype": "bfloat16",
1641
+ "format": "raw",
1642
+ "nbytes": 4096,
1643
+ "byteOffset": 12976128
1644
+ },
1645
+ {
1646
+ "name": "model.layers.20.self_attn.qkv_proj.q_weight",
1647
+ "shape": [
1648
+ 2560,
1649
+ 256
1650
+ ],
1651
+ "dtype": "uint32",
1652
+ "format": "f32-to-bf16",
1653
+ "nbytes": 2621440,
1654
+ "byteOffset": 12980224
1655
+ },
1656
+ {
1657
+ "name": "model.layers.20.self_attn.qkv_proj.q_scale",
1658
+ "shape": [
1659
+ 2560,
1660
+ 64
1661
+ ],
1662
+ "dtype": "bfloat16",
1663
+ "format": "raw",
1664
+ "nbytes": 327680,
1665
+ "byteOffset": 15601664
1666
+ },
1667
+ {
1668
+ "name": "model.layers.20.self_attn.o_proj.q_weight",
1669
+ "shape": [
1670
+ 2048,
1671
+ 256
1672
+ ],
1673
+ "dtype": "uint32",
1674
+ "format": "f32-to-bf16",
1675
+ "nbytes": 2097152,
1676
+ "byteOffset": 15929344
1677
+ },
1678
+ {
1679
+ "name": "model.layers.20.self_attn.o_proj.q_scale",
1680
+ "shape": [
1681
+ 2048,
1682
+ 64
1683
+ ],
1684
+ "dtype": "bfloat16",
1685
+ "format": "raw",
1686
+ "nbytes": 262144,
1687
+ "byteOffset": 18026496
1688
+ },
1689
+ {
1690
+ "name": "model.layers.21.input_layernorm.weight",
1691
+ "shape": [
1692
+ 2048
1693
+ ],
1694
+ "dtype": "bfloat16",
1695
+ "format": "raw",
1696
+ "nbytes": 4096,
1697
+ "byteOffset": 18288640
1698
+ },
1699
+ {
1700
+ "name": "model.layers.21.mlp.down_proj.q_weight",
1701
+ "shape": [
1702
+ 2048,
1703
+ 704
1704
+ ],
1705
+ "dtype": "uint32",
1706
+ "format": "f32-to-bf16",
1707
+ "nbytes": 5767168,
1708
+ "byteOffset": 18292736
1709
+ },
1710
+ {
1711
+ "name": "model.layers.21.mlp.down_proj.q_scale",
1712
+ "shape": [
1713
+ 2048,
1714
+ 176
1715
+ ],
1716
+ "dtype": "bfloat16",
1717
+ "format": "raw",
1718
+ "nbytes": 720896,
1719
+ "byteOffset": 24059904
1720
+ }
1721
+ ],
1722
+ "md5sum": "b1864fe4abf3244367e8fedd502eb8ca"
1723
+ },
1724
+ {
1725
+ "dataPath": "params_shard_16.bin",
1726
+ "format": "raw-shard",
1727
+ "nbytes": 24780800,
1728
+ "records": [
1729
+ {
1730
+ "name": "model.layers.21.mlp.gate_up_proj.q_weight",
1731
+ "shape": [
1732
+ 11264,
1733
+ 256
1734
+ ],
1735
+ "dtype": "uint32",
1736
+ "format": "f32-to-bf16",
1737
+ "nbytes": 11534336,
1738
+ "byteOffset": 0
1739
+ },
1740
+ {
1741
+ "name": "model.layers.21.mlp.gate_up_proj.q_scale",
1742
+ "shape": [
1743
+ 11264,
1744
+ 64
1745
+ ],
1746
+ "dtype": "bfloat16",
1747
+ "format": "raw",
1748
+ "nbytes": 1441792,
1749
+ "byteOffset": 11534336
1750
+ },
1751
+ {
1752
+ "name": "model.layers.21.post_attention_layernorm.weight",
1753
+ "shape": [
1754
+ 2048
1755
+ ],
1756
+ "dtype": "bfloat16",
1757
+ "format": "raw",
1758
+ "nbytes": 4096,
1759
+ "byteOffset": 12976128
1760
+ },
1761
+ {
1762
+ "name": "model.layers.21.self_attn.qkv_proj.q_weight",
1763
+ "shape": [
1764
+ 2560,
1765
+ 256
1766
+ ],
1767
+ "dtype": "uint32",
1768
+ "format": "f32-to-bf16",
1769
+ "nbytes": 2621440,
1770
+ "byteOffset": 12980224
1771
+ },
1772
+ {
1773
+ "name": "model.layers.21.self_attn.qkv_proj.q_scale",
1774
+ "shape": [
1775
+ 2560,
1776
+ 64
1777
+ ],
1778
+ "dtype": "bfloat16",
1779
+ "format": "raw",
1780
+ "nbytes": 327680,
1781
+ "byteOffset": 15601664
1782
+ },
1783
+ {
1784
+ "name": "model.layers.21.self_attn.o_proj.q_weight",
1785
+ "shape": [
1786
+ 2048,
1787
+ 256
1788
+ ],
1789
+ "dtype": "uint32",
1790
+ "format": "f32-to-bf16",
1791
+ "nbytes": 2097152,
1792
+ "byteOffset": 15929344
1793
+ },
1794
+ {
1795
+ "name": "model.layers.21.self_attn.o_proj.q_scale",
1796
+ "shape": [
1797
+ 2048,
1798
+ 64
1799
+ ],
1800
+ "dtype": "bfloat16",
1801
+ "format": "raw",
1802
+ "nbytes": 262144,
1803
+ "byteOffset": 18026496
1804
+ },
1805
+ {
1806
+ "name": "model.layers.3.input_layernorm.weight",
1807
+ "shape": [
1808
+ 2048
1809
+ ],
1810
+ "dtype": "bfloat16",
1811
+ "format": "raw",
1812
+ "nbytes": 4096,
1813
+ "byteOffset": 18288640
1814
+ },
1815
+ {
1816
+ "name": "model.layers.3.mlp.down_proj.q_weight",
1817
+ "shape": [
1818
+ 2048,
1819
+ 704
1820
+ ],
1821
+ "dtype": "uint32",
1822
+ "format": "f32-to-bf16",
1823
+ "nbytes": 5767168,
1824
+ "byteOffset": 18292736
1825
+ },
1826
+ {
1827
+ "name": "model.layers.3.mlp.down_proj.q_scale",
1828
+ "shape": [
1829
+ 2048,
1830
+ 176
1831
+ ],
1832
+ "dtype": "bfloat16",
1833
+ "format": "raw",
1834
+ "nbytes": 720896,
1835
+ "byteOffset": 24059904
1836
+ }
1837
+ ],
1838
+ "md5sum": "505cab1c1b2228b83fd2c1d2240f0959"
1839
+ },
1840
+ {
1841
+ "dataPath": "params_shard_17.bin",
1842
+ "format": "raw-shard",
1843
+ "nbytes": 24780800,
1844
+ "records": [
1845
+ {
1846
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
1847
+ "shape": [
1848
+ 11264,
1849
+ 256
1850
+ ],
1851
+ "dtype": "uint32",
1852
+ "format": "f32-to-bf16",
1853
+ "nbytes": 11534336,
1854
+ "byteOffset": 0
1855
+ },
1856
+ {
1857
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
1858
+ "shape": [
1859
+ 11264,
1860
+ 64
1861
+ ],
1862
+ "dtype": "bfloat16",
1863
+ "format": "raw",
1864
+ "nbytes": 1441792,
1865
+ "byteOffset": 11534336
1866
+ },
1867
+ {
1868
+ "name": "model.layers.3.post_attention_layernorm.weight",
1869
+ "shape": [
1870
+ 2048
1871
+ ],
1872
+ "dtype": "bfloat16",
1873
+ "format": "raw",
1874
+ "nbytes": 4096,
1875
+ "byteOffset": 12976128
1876
+ },
1877
+ {
1878
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
1879
+ "shape": [
1880
+ 2560,
1881
+ 256
1882
+ ],
1883
+ "dtype": "uint32",
1884
+ "format": "f32-to-bf16",
1885
+ "nbytes": 2621440,
1886
+ "byteOffset": 12980224
1887
+ },
1888
+ {
1889
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
1890
+ "shape": [
1891
+ 2560,
1892
+ 64
1893
+ ],
1894
+ "dtype": "bfloat16",
1895
+ "format": "raw",
1896
+ "nbytes": 327680,
1897
+ "byteOffset": 15601664
1898
+ },
1899
+ {
1900
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
1901
+ "shape": [
1902
+ 2048,
1903
+ 256
1904
+ ],
1905
+ "dtype": "uint32",
1906
+ "format": "f32-to-bf16",
1907
+ "nbytes": 2097152,
1908
+ "byteOffset": 15929344
1909
+ },
1910
+ {
1911
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
1912
+ "shape": [
1913
+ 2048,
1914
+ 64
1915
+ ],
1916
+ "dtype": "bfloat16",
1917
+ "format": "raw",
1918
+ "nbytes": 262144,
1919
+ "byteOffset": 18026496
1920
+ },
1921
+ {
1922
+ "name": "model.layers.4.input_layernorm.weight",
1923
+ "shape": [
1924
+ 2048
1925
+ ],
1926
+ "dtype": "bfloat16",
1927
+ "format": "raw",
1928
+ "nbytes": 4096,
1929
+ "byteOffset": 18288640
1930
+ },
1931
+ {
1932
+ "name": "model.layers.4.mlp.down_proj.q_weight",
1933
+ "shape": [
1934
+ 2048,
1935
+ 704
1936
+ ],
1937
+ "dtype": "uint32",
1938
+ "format": "f32-to-bf16",
1939
+ "nbytes": 5767168,
1940
+ "byteOffset": 18292736
1941
+ },
1942
+ {
1943
+ "name": "model.layers.4.mlp.down_proj.q_scale",
1944
+ "shape": [
1945
+ 2048,
1946
+ 176
1947
+ ],
1948
+ "dtype": "bfloat16",
1949
+ "format": "raw",
1950
+ "nbytes": 720896,
1951
+ "byteOffset": 24059904
1952
+ }
1953
+ ],
1954
+ "md5sum": "a798beddbb6ccf6e8b76b51ae087c28e"
1955
+ },
1956
+ {
1957
+ "dataPath": "params_shard_18.bin",
1958
+ "format": "raw-shard",
1959
+ "nbytes": 24780800,
1960
+ "records": [
1961
+ {
1962
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
1963
+ "shape": [
1964
+ 11264,
1965
+ 256
1966
+ ],
1967
+ "dtype": "uint32",
1968
+ "format": "f32-to-bf16",
1969
+ "nbytes": 11534336,
1970
+ "byteOffset": 0
1971
+ },
1972
+ {
1973
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
1974
+ "shape": [
1975
+ 11264,
1976
+ 64
1977
+ ],
1978
+ "dtype": "bfloat16",
1979
+ "format": "raw",
1980
+ "nbytes": 1441792,
1981
+ "byteOffset": 11534336
1982
+ },
1983
+ {
1984
+ "name": "model.layers.4.post_attention_layernorm.weight",
1985
+ "shape": [
1986
+ 2048
1987
+ ],
1988
+ "dtype": "bfloat16",
1989
+ "format": "raw",
1990
+ "nbytes": 4096,
1991
+ "byteOffset": 12976128
1992
+ },
1993
+ {
1994
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
1995
+ "shape": [
1996
+ 2560,
1997
+ 256
1998
+ ],
1999
+ "dtype": "uint32",
2000
+ "format": "f32-to-bf16",
2001
+ "nbytes": 2621440,
2002
+ "byteOffset": 12980224
2003
+ },
2004
+ {
2005
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
2006
+ "shape": [
2007
+ 2560,
2008
+ 64
2009
+ ],
2010
+ "dtype": "bfloat16",
2011
+ "format": "raw",
2012
+ "nbytes": 327680,
2013
+ "byteOffset": 15601664
2014
+ },
2015
+ {
2016
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
2017
+ "shape": [
2018
+ 2048,
2019
+ 256
2020
+ ],
2021
+ "dtype": "uint32",
2022
+ "format": "f32-to-bf16",
2023
+ "nbytes": 2097152,
2024
+ "byteOffset": 15929344
2025
+ },
2026
+ {
2027
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
2028
+ "shape": [
2029
+ 2048,
2030
+ 64
2031
+ ],
2032
+ "dtype": "bfloat16",
2033
+ "format": "raw",
2034
+ "nbytes": 262144,
2035
+ "byteOffset": 18026496
2036
+ },
2037
+ {
2038
+ "name": "model.layers.5.input_layernorm.weight",
2039
+ "shape": [
2040
+ 2048
2041
+ ],
2042
+ "dtype": "bfloat16",
2043
+ "format": "raw",
2044
+ "nbytes": 4096,
2045
+ "byteOffset": 18288640
2046
+ },
2047
+ {
2048
+ "name": "model.layers.5.mlp.down_proj.q_weight",
2049
+ "shape": [
2050
+ 2048,
2051
+ 704
2052
+ ],
2053
+ "dtype": "uint32",
2054
+ "format": "f32-to-bf16",
2055
+ "nbytes": 5767168,
2056
+ "byteOffset": 18292736
2057
+ },
2058
+ {
2059
+ "name": "model.layers.5.mlp.down_proj.q_scale",
2060
+ "shape": [
2061
+ 2048,
2062
+ 176
2063
+ ],
2064
+ "dtype": "bfloat16",
2065
+ "format": "raw",
2066
+ "nbytes": 720896,
2067
+ "byteOffset": 24059904
2068
+ }
2069
+ ],
2070
+ "md5sum": "4fc462bba2bb7ed3004b13350e52ce3f"
2071
+ },
2072
+ {
2073
+ "dataPath": "params_shard_19.bin",
2074
+ "format": "raw-shard",
2075
+ "nbytes": 24780800,
2076
+ "records": [
2077
+ {
2078
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
2079
+ "shape": [
2080
+ 11264,
2081
+ 256
2082
+ ],
2083
+ "dtype": "uint32",
2084
+ "format": "f32-to-bf16",
2085
+ "nbytes": 11534336,
2086
+ "byteOffset": 0
2087
+ },
2088
+ {
2089
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
2090
+ "shape": [
2091
+ 11264,
2092
+ 64
2093
+ ],
2094
+ "dtype": "bfloat16",
2095
+ "format": "raw",
2096
+ "nbytes": 1441792,
2097
+ "byteOffset": 11534336
2098
+ },
2099
+ {
2100
+ "name": "model.layers.5.post_attention_layernorm.weight",
2101
+ "shape": [
2102
+ 2048
2103
+ ],
2104
+ "dtype": "bfloat16",
2105
+ "format": "raw",
2106
+ "nbytes": 4096,
2107
+ "byteOffset": 12976128
2108
+ },
2109
+ {
2110
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
2111
+ "shape": [
2112
+ 2560,
2113
+ 256
2114
+ ],
2115
+ "dtype": "uint32",
2116
+ "format": "f32-to-bf16",
2117
+ "nbytes": 2621440,
2118
+ "byteOffset": 12980224
2119
+ },
2120
+ {
2121
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
2122
+ "shape": [
2123
+ 2560,
2124
+ 64
2125
+ ],
2126
+ "dtype": "bfloat16",
2127
+ "format": "raw",
2128
+ "nbytes": 327680,
2129
+ "byteOffset": 15601664
2130
+ },
2131
+ {
2132
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
2133
+ "shape": [
2134
+ 2048,
2135
+ 256
2136
+ ],
2137
+ "dtype": "uint32",
2138
+ "format": "f32-to-bf16",
2139
+ "nbytes": 2097152,
2140
+ "byteOffset": 15929344
2141
+ },
2142
+ {
2143
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
2144
+ "shape": [
2145
+ 2048,
2146
+ 64
2147
+ ],
2148
+ "dtype": "bfloat16",
2149
+ "format": "raw",
2150
+ "nbytes": 262144,
2151
+ "byteOffset": 18026496
2152
+ },
2153
+ {
2154
+ "name": "model.layers.6.input_layernorm.weight",
2155
+ "shape": [
2156
+ 2048
2157
+ ],
2158
+ "dtype": "bfloat16",
2159
+ "format": "raw",
2160
+ "nbytes": 4096,
2161
+ "byteOffset": 18288640
2162
+ },
2163
+ {
2164
+ "name": "model.layers.6.mlp.down_proj.q_weight",
2165
+ "shape": [
2166
+ 2048,
2167
+ 704
2168
+ ],
2169
+ "dtype": "uint32",
2170
+ "format": "f32-to-bf16",
2171
+ "nbytes": 5767168,
2172
+ "byteOffset": 18292736
2173
+ },
2174
+ {
2175
+ "name": "model.layers.6.mlp.down_proj.q_scale",
2176
+ "shape": [
2177
+ 2048,
2178
+ 176
2179
+ ],
2180
+ "dtype": "bfloat16",
2181
+ "format": "raw",
2182
+ "nbytes": 720896,
2183
+ "byteOffset": 24059904
2184
+ }
2185
+ ],
2186
+ "md5sum": "6f0b3e5f9da341272077d653a0983dfe"
2187
+ },
2188
+ {
2189
+ "dataPath": "params_shard_20.bin",
2190
+ "format": "raw-shard",
2191
+ "nbytes": 24780800,
2192
+ "records": [
2193
+ {
2194
+ "name": "model.layers.6.mlp.gate_up_proj.q_weight",
2195
+ "shape": [
2196
+ 11264,
2197
+ 256
2198
+ ],
2199
+ "dtype": "uint32",
2200
+ "format": "f32-to-bf16",
2201
+ "nbytes": 11534336,
2202
+ "byteOffset": 0
2203
+ },
2204
+ {
2205
+ "name": "model.layers.6.mlp.gate_up_proj.q_scale",
2206
+ "shape": [
2207
+ 11264,
2208
+ 64
2209
+ ],
2210
+ "dtype": "bfloat16",
2211
+ "format": "raw",
2212
+ "nbytes": 1441792,
2213
+ "byteOffset": 11534336
2214
+ },
2215
+ {
2216
+ "name": "model.layers.6.post_attention_layernorm.weight",
2217
+ "shape": [
2218
+ 2048
2219
+ ],
2220
+ "dtype": "bfloat16",
2221
+ "format": "raw",
2222
+ "nbytes": 4096,
2223
+ "byteOffset": 12976128
2224
+ },
2225
+ {
2226
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
2227
+ "shape": [
2228
+ 2560,
2229
+ 256
2230
+ ],
2231
+ "dtype": "uint32",
2232
+ "format": "f32-to-bf16",
2233
+ "nbytes": 2621440,
2234
+ "byteOffset": 12980224
2235
+ },
2236
+ {
2237
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
2238
+ "shape": [
2239
+ 2560,
2240
+ 64
2241
+ ],
2242
+ "dtype": "bfloat16",
2243
+ "format": "raw",
2244
+ "nbytes": 327680,
2245
+ "byteOffset": 15601664
2246
+ },
2247
+ {
2248
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
2249
+ "shape": [
2250
+ 2048,
2251
+ 256
2252
+ ],
2253
+ "dtype": "uint32",
2254
+ "format": "f32-to-bf16",
2255
+ "nbytes": 2097152,
2256
+ "byteOffset": 15929344
2257
+ },
2258
+ {
2259
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
2260
+ "shape": [
2261
+ 2048,
2262
+ 64
2263
+ ],
2264
+ "dtype": "bfloat16",
2265
+ "format": "raw",
2266
+ "nbytes": 262144,
2267
+ "byteOffset": 18026496
2268
+ },
2269
+ {
2270
+ "name": "model.layers.7.input_layernorm.weight",
2271
+ "shape": [
2272
+ 2048
2273
+ ],
2274
+ "dtype": "bfloat16",
2275
+ "format": "raw",
2276
+ "nbytes": 4096,
2277
+ "byteOffset": 18288640
2278
+ },
2279
+ {
2280
+ "name": "model.layers.7.mlp.down_proj.q_weight",
2281
+ "shape": [
2282
+ 2048,
2283
+ 704
2284
+ ],
2285
+ "dtype": "uint32",
2286
+ "format": "f32-to-bf16",
2287
+ "nbytes": 5767168,
2288
+ "byteOffset": 18292736
2289
+ },
2290
+ {
2291
+ "name": "model.layers.7.mlp.down_proj.q_scale",
2292
+ "shape": [
2293
+ 2048,
2294
+ 176
2295
+ ],
2296
+ "dtype": "bfloat16",
2297
+ "format": "raw",
2298
+ "nbytes": 720896,
2299
+ "byteOffset": 24059904
2300
+ }
2301
+ ],
2302
+ "md5sum": "686b2f5cffad40c9ede9515e7d23237e"
2303
+ },
2304
+ {
2305
+ "dataPath": "params_shard_21.bin",
2306
+ "format": "raw-shard",
2307
+ "nbytes": 24780800,
2308
+ "records": [
2309
+ {
2310
+ "name": "model.layers.7.mlp.gate_up_proj.q_weight",
2311
+ "shape": [
2312
+ 11264,
2313
+ 256
2314
+ ],
2315
+ "dtype": "uint32",
2316
+ "format": "f32-to-bf16",
2317
+ "nbytes": 11534336,
2318
+ "byteOffset": 0
2319
+ },
2320
+ {
2321
+ "name": "model.layers.7.mlp.gate_up_proj.q_scale",
2322
+ "shape": [
2323
+ 11264,
2324
+ 64
2325
+ ],
2326
+ "dtype": "bfloat16",
2327
+ "format": "raw",
2328
+ "nbytes": 1441792,
2329
+ "byteOffset": 11534336
2330
+ },
2331
+ {
2332
+ "name": "model.layers.7.post_attention_layernorm.weight",
2333
+ "shape": [
2334
+ 2048
2335
+ ],
2336
+ "dtype": "bfloat16",
2337
+ "format": "raw",
2338
+ "nbytes": 4096,
2339
+ "byteOffset": 12976128
2340
+ },
2341
+ {
2342
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
2343
+ "shape": [
2344
+ 2560,
2345
+ 256
2346
+ ],
2347
+ "dtype": "uint32",
2348
+ "format": "f32-to-bf16",
2349
+ "nbytes": 2621440,
2350
+ "byteOffset": 12980224
2351
+ },
2352
+ {
2353
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
2354
+ "shape": [
2355
+ 2560,
2356
+ 64
2357
+ ],
2358
+ "dtype": "bfloat16",
2359
+ "format": "raw",
2360
+ "nbytes": 327680,
2361
+ "byteOffset": 15601664
2362
+ },
2363
+ {
2364
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
2365
+ "shape": [
2366
+ 2048,
2367
+ 256
2368
+ ],
2369
+ "dtype": "uint32",
2370
+ "format": "f32-to-bf16",
2371
+ "nbytes": 2097152,
2372
+ "byteOffset": 15929344
2373
+ },
2374
+ {
2375
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
2376
+ "shape": [
2377
+ 2048,
2378
+ 64
2379
+ ],
2380
+ "dtype": "bfloat16",
2381
+ "format": "raw",
2382
+ "nbytes": 262144,
2383
+ "byteOffset": 18026496
2384
+ },
2385
+ {
2386
+ "name": "model.layers.8.input_layernorm.weight",
2387
+ "shape": [
2388
+ 2048
2389
+ ],
2390
+ "dtype": "bfloat16",
2391
+ "format": "raw",
2392
+ "nbytes": 4096,
2393
+ "byteOffset": 18288640
2394
+ },
2395
+ {
2396
+ "name": "model.layers.8.mlp.down_proj.q_weight",
2397
+ "shape": [
2398
+ 2048,
2399
+ 704
2400
+ ],
2401
+ "dtype": "uint32",
2402
+ "format": "f32-to-bf16",
2403
+ "nbytes": 5767168,
2404
+ "byteOffset": 18292736
2405
+ },
2406
+ {
2407
+ "name": "model.layers.8.mlp.down_proj.q_scale",
2408
+ "shape": [
2409
+ 2048,
2410
+ 176
2411
+ ],
2412
+ "dtype": "bfloat16",
2413
+ "format": "raw",
2414
+ "nbytes": 720896,
2415
+ "byteOffset": 24059904
2416
+ }
2417
+ ],
2418
+ "md5sum": "686faea393d9673351ea835a31d58d1c"
2419
+ },
2420
+ {
2421
+ "dataPath": "params_shard_22.bin",
2422
+ "format": "raw-shard",
2423
+ "nbytes": 24780800,
2424
+ "records": [
2425
+ {
2426
+ "name": "model.layers.8.mlp.gate_up_proj.q_weight",
2427
+ "shape": [
2428
+ 11264,
2429
+ 256
2430
+ ],
2431
+ "dtype": "uint32",
2432
+ "format": "f32-to-bf16",
2433
+ "nbytes": 11534336,
2434
+ "byteOffset": 0
2435
+ },
2436
+ {
2437
+ "name": "model.layers.8.mlp.gate_up_proj.q_scale",
2438
+ "shape": [
2439
+ 11264,
2440
+ 64
2441
+ ],
2442
+ "dtype": "bfloat16",
2443
+ "format": "raw",
2444
+ "nbytes": 1441792,
2445
+ "byteOffset": 11534336
2446
+ },
2447
+ {
2448
+ "name": "model.layers.8.post_attention_layernorm.weight",
2449
+ "shape": [
2450
+ 2048
2451
+ ],
2452
+ "dtype": "bfloat16",
2453
+ "format": "raw",
2454
+ "nbytes": 4096,
2455
+ "byteOffset": 12976128
2456
+ },
2457
+ {
2458
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
2459
+ "shape": [
2460
+ 2560,
2461
+ 256
2462
+ ],
2463
+ "dtype": "uint32",
2464
+ "format": "f32-to-bf16",
2465
+ "nbytes": 2621440,
2466
+ "byteOffset": 12980224
2467
+ },
2468
+ {
2469
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
2470
+ "shape": [
2471
+ 2560,
2472
+ 64
2473
+ ],
2474
+ "dtype": "bfloat16",
2475
+ "format": "raw",
2476
+ "nbytes": 327680,
2477
+ "byteOffset": 15601664
2478
+ },
2479
+ {
2480
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
2481
+ "shape": [
2482
+ 2048,
2483
+ 256
2484
+ ],
2485
+ "dtype": "uint32",
2486
+ "format": "f32-to-bf16",
2487
+ "nbytes": 2097152,
2488
+ "byteOffset": 15929344
2489
+ },
2490
+ {
2491
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
2492
+ "shape": [
2493
+ 2048,
2494
+ 64
2495
+ ],
2496
+ "dtype": "bfloat16",
2497
+ "format": "raw",
2498
+ "nbytes": 262144,
2499
+ "byteOffset": 18026496
2500
+ },
2501
+ {
2502
+ "name": "model.layers.9.input_layernorm.weight",
2503
+ "shape": [
2504
+ 2048
2505
+ ],
2506
+ "dtype": "bfloat16",
2507
+ "format": "raw",
2508
+ "nbytes": 4096,
2509
+ "byteOffset": 18288640
2510
+ },
2511
+ {
2512
+ "name": "model.layers.9.mlp.down_proj.q_weight",
2513
+ "shape": [
2514
+ 2048,
2515
+ 704
2516
+ ],
2517
+ "dtype": "uint32",
2518
+ "format": "f32-to-bf16",
2519
+ "nbytes": 5767168,
2520
+ "byteOffset": 18292736
2521
+ },
2522
+ {
2523
+ "name": "model.layers.9.mlp.down_proj.q_scale",
2524
+ "shape": [
2525
+ 2048,
2526
+ 176
2527
+ ],
2528
+ "dtype": "bfloat16",
2529
+ "format": "raw",
2530
+ "nbytes": 720896,
2531
+ "byteOffset": 24059904
2532
+ }
2533
+ ],
2534
+ "md5sum": "5a11a5b68f457af4ae8df8926c2c02ff"
2535
+ },
2536
+ {
2537
+ "dataPath": "params_shard_23.bin",
2538
+ "format": "raw-shard",
2539
+ "nbytes": 18292736,
2540
+ "records": [
2541
+ {
2542
+ "name": "model.layers.9.mlp.gate_up_proj.q_weight",
2543
+ "shape": [
2544
+ 11264,
2545
+ 256
2546
+ ],
2547
+ "dtype": "uint32",
2548
+ "format": "f32-to-bf16",
2549
+ "nbytes": 11534336,
2550
+ "byteOffset": 0
2551
+ },
2552
+ {
2553
+ "name": "model.layers.9.mlp.gate_up_proj.q_scale",
2554
+ "shape": [
2555
+ 11264,
2556
+ 64
2557
+ ],
2558
+ "dtype": "bfloat16",
2559
+ "format": "raw",
2560
+ "nbytes": 1441792,
2561
+ "byteOffset": 11534336
2562
+ },
2563
+ {
2564
+ "name": "model.layers.9.post_attention_layernorm.weight",
2565
+ "shape": [
2566
+ 2048
2567
+ ],
2568
+ "dtype": "bfloat16",
2569
+ "format": "raw",
2570
+ "nbytes": 4096,
2571
+ "byteOffset": 12976128
2572
+ },
2573
+ {
2574
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
2575
+ "shape": [
2576
+ 2560,
2577
+ 256
2578
+ ],
2579
+ "dtype": "uint32",
2580
+ "format": "f32-to-bf16",
2581
+ "nbytes": 2621440,
2582
+ "byteOffset": 12980224
2583
+ },
2584
+ {
2585
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
2586
+ "shape": [
2587
+ 2560,
2588
+ 64
2589
+ ],
2590
+ "dtype": "bfloat16",
2591
+ "format": "raw",
2592
+ "nbytes": 327680,
2593
+ "byteOffset": 15601664
2594
+ },
2595
+ {
2596
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
2597
+ "shape": [
2598
+ 2048,
2599
+ 256
2600
+ ],
2601
+ "dtype": "uint32",
2602
+ "format": "f32-to-bf16",
2603
+ "nbytes": 2097152,
2604
+ "byteOffset": 15929344
2605
+ },
2606
+ {
2607
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
2608
+ "shape": [
2609
+ 2048,
2610
+ 64
2611
+ ],
2612
+ "dtype": "bfloat16",
2613
+ "format": "raw",
2614
+ "nbytes": 262144,
2615
+ "byteOffset": 18026496
2616
+ },
2617
+ {
2618
+ "name": "model.norm.weight",
2619
+ "shape": [
2620
+ 2048
2621
+ ],
2622
+ "dtype": "bfloat16",
2623
+ "format": "raw",
2624
+ "nbytes": 4096,
2625
+ "byteOffset": 18288640
2626
+ }
2627
+ ],
2628
+ "md5sum": "83f96e3340c8f435412897a17003888e"
2629
+ }
2630
+ ]
2631
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,2631 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 225,
4
+ "ParamBytes": 687841280.0,
5
+ "BitsPerParam": 5.0022620095953885
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 32768000,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.q_weight",
15
+ "shape": [
16
+ 32000,
17
+ 256
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 32768000,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "6acf42e9e3428cda2d937cedfbc2bdbe"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 32768000,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.q_weight",
34
+ "shape": [
35
+ 32000,
36
+ 256
37
+ ],
38
+ "dtype": "uint32",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 32768000,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "5a4dc43f3da7ea1f953c0c460796a420"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 32976896,
50
+ "records": [
51
+ {
52
+ "name": "lm_head.q_scale",
53
+ "shape": [
54
+ 32000,
55
+ 64
56
+ ],
57
+ "dtype": "float32",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 4096000,
60
+ "byteOffset": 0
61
+ },
62
+ {
63
+ "name": "model.embed_tokens.q_scale",
64
+ "shape": [
65
+ 32000,
66
+ 64
67
+ ],
68
+ "dtype": "float32",
69
+ "format": "f32-to-bf16",
70
+ "nbytes": 4096000,
71
+ "byteOffset": 4096000
72
+ },
73
+ {
74
+ "name": "model.layers.0.input_layernorm.weight",
75
+ "shape": [
76
+ 2048
77
+ ],
78
+ "dtype": "float32",
79
+ "format": "f32-to-bf16",
80
+ "nbytes": 4096,
81
+ "byteOffset": 8192000
82
+ },
83
+ {
84
+ "name": "model.layers.0.mlp.down_proj.q_weight",
85
+ "shape": [
86
+ 2048,
87
+ 704
88
+ ],
89
+ "dtype": "uint32",
90
+ "format": "f32-to-bf16",
91
+ "nbytes": 5767168,
92
+ "byteOffset": 8196096
93
+ },
94
+ {
95
+ "name": "model.layers.0.mlp.down_proj.q_scale",
96
+ "shape": [
97
+ 2048,
98
+ 176
99
+ ],
100
+ "dtype": "float32",
101
+ "format": "f32-to-bf16",
102
+ "nbytes": 720896,
103
+ "byteOffset": 13963264
104
+ },
105
+ {
106
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
107
+ "shape": [
108
+ 11264,
109
+ 256
110
+ ],
111
+ "dtype": "uint32",
112
+ "format": "f32-to-bf16",
113
+ "nbytes": 11534336,
114
+ "byteOffset": 14684160
115
+ },
116
+ {
117
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
118
+ "shape": [
119
+ 11264,
120
+ 64
121
+ ],
122
+ "dtype": "float32",
123
+ "format": "f32-to-bf16",
124
+ "nbytes": 1441792,
125
+ "byteOffset": 26218496
126
+ },
127
+ {
128
+ "name": "model.layers.0.post_attention_layernorm.weight",
129
+ "shape": [
130
+ 2048
131
+ ],
132
+ "dtype": "float32",
133
+ "format": "f32-to-bf16",
134
+ "nbytes": 4096,
135
+ "byteOffset": 27660288
136
+ },
137
+ {
138
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
139
+ "shape": [
140
+ 2560,
141
+ 256
142
+ ],
143
+ "dtype": "uint32",
144
+ "format": "f32-to-bf16",
145
+ "nbytes": 2621440,
146
+ "byteOffset": 27664384
147
+ },
148
+ {
149
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
150
+ "shape": [
151
+ 2560,
152
+ 64
153
+ ],
154
+ "dtype": "float32",
155
+ "format": "f32-to-bf16",
156
+ "nbytes": 327680,
157
+ "byteOffset": 30285824
158
+ },
159
+ {
160
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
161
+ "shape": [
162
+ 2048,
163
+ 256
164
+ ],
165
+ "dtype": "uint32",
166
+ "format": "f32-to-bf16",
167
+ "nbytes": 2097152,
168
+ "byteOffset": 30613504
169
+ },
170
+ {
171
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
172
+ "shape": [
173
+ 2048,
174
+ 64
175
+ ],
176
+ "dtype": "float32",
177
+ "format": "f32-to-bf16",
178
+ "nbytes": 262144,
179
+ "byteOffset": 32710656
180
+ },
181
+ {
182
+ "name": "model.layers.1.input_layernorm.weight",
183
+ "shape": [
184
+ 2048
185
+ ],
186
+ "dtype": "float32",
187
+ "format": "f32-to-bf16",
188
+ "nbytes": 4096,
189
+ "byteOffset": 32972800
190
+ }
191
+ ],
192
+ "md5sum": "b8052fae94b2f9484c7fed0f270034b6"
193
+ },
194
+ {
195
+ "dataPath": "params_shard_3.bin",
196
+ "format": "raw-shard",
197
+ "nbytes": 31268864,
198
+ "records": [
199
+ {
200
+ "name": "model.layers.1.mlp.down_proj.q_weight",
201
+ "shape": [
202
+ 2048,
203
+ 704
204
+ ],
205
+ "dtype": "uint32",
206
+ "format": "f32-to-bf16",
207
+ "nbytes": 5767168,
208
+ "byteOffset": 0
209
+ },
210
+ {
211
+ "name": "model.layers.1.mlp.down_proj.q_scale",
212
+ "shape": [
213
+ 2048,
214
+ 176
215
+ ],
216
+ "dtype": "float32",
217
+ "format": "f32-to-bf16",
218
+ "nbytes": 720896,
219
+ "byteOffset": 5767168
220
+ },
221
+ {
222
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
223
+ "shape": [
224
+ 11264,
225
+ 256
226
+ ],
227
+ "dtype": "uint32",
228
+ "format": "f32-to-bf16",
229
+ "nbytes": 11534336,
230
+ "byteOffset": 6488064
231
+ },
232
+ {
233
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
234
+ "shape": [
235
+ 11264,
236
+ 64
237
+ ],
238
+ "dtype": "float32",
239
+ "format": "f32-to-bf16",
240
+ "nbytes": 1441792,
241
+ "byteOffset": 18022400
242
+ },
243
+ {
244
+ "name": "model.layers.1.post_attention_layernorm.weight",
245
+ "shape": [
246
+ 2048
247
+ ],
248
+ "dtype": "float32",
249
+ "format": "f32-to-bf16",
250
+ "nbytes": 4096,
251
+ "byteOffset": 19464192
252
+ },
253
+ {
254
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
255
+ "shape": [
256
+ 2560,
257
+ 256
258
+ ],
259
+ "dtype": "uint32",
260
+ "format": "f32-to-bf16",
261
+ "nbytes": 2621440,
262
+ "byteOffset": 19468288
263
+ },
264
+ {
265
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
266
+ "shape": [
267
+ 2560,
268
+ 64
269
+ ],
270
+ "dtype": "float32",
271
+ "format": "f32-to-bf16",
272
+ "nbytes": 327680,
273
+ "byteOffset": 22089728
274
+ },
275
+ {
276
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
277
+ "shape": [
278
+ 2048,
279
+ 256
280
+ ],
281
+ "dtype": "uint32",
282
+ "format": "f32-to-bf16",
283
+ "nbytes": 2097152,
284
+ "byteOffset": 22417408
285
+ },
286
+ {
287
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
288
+ "shape": [
289
+ 2048,
290
+ 64
291
+ ],
292
+ "dtype": "float32",
293
+ "format": "f32-to-bf16",
294
+ "nbytes": 262144,
295
+ "byteOffset": 24514560
296
+ },
297
+ {
298
+ "name": "model.layers.10.input_layernorm.weight",
299
+ "shape": [
300
+ 2048
301
+ ],
302
+ "dtype": "float32",
303
+ "format": "f32-to-bf16",
304
+ "nbytes": 4096,
305
+ "byteOffset": 24776704
306
+ },
307
+ {
308
+ "name": "model.layers.10.mlp.down_proj.q_weight",
309
+ "shape": [
310
+ 2048,
311
+ 704
312
+ ],
313
+ "dtype": "uint32",
314
+ "format": "f32-to-bf16",
315
+ "nbytes": 5767168,
316
+ "byteOffset": 24780800
317
+ },
318
+ {
319
+ "name": "model.layers.10.mlp.down_proj.q_scale",
320
+ "shape": [
321
+ 2048,
322
+ 176
323
+ ],
324
+ "dtype": "float32",
325
+ "format": "f32-to-bf16",
326
+ "nbytes": 720896,
327
+ "byteOffset": 30547968
328
+ }
329
+ ],
330
+ "md5sum": "236ea086fe698d15895a284f828cd334"
331
+ },
332
+ {
333
+ "dataPath": "params_shard_4.bin",
334
+ "format": "raw-shard",
335
+ "nbytes": 24780800,
336
+ "records": [
337
+ {
338
+ "name": "model.layers.10.mlp.gate_up_proj.q_weight",
339
+ "shape": [
340
+ 11264,
341
+ 256
342
+ ],
343
+ "dtype": "uint32",
344
+ "format": "f32-to-bf16",
345
+ "nbytes": 11534336,
346
+ "byteOffset": 0
347
+ },
348
+ {
349
+ "name": "model.layers.10.mlp.gate_up_proj.q_scale",
350
+ "shape": [
351
+ 11264,
352
+ 64
353
+ ],
354
+ "dtype": "float32",
355
+ "format": "f32-to-bf16",
356
+ "nbytes": 1441792,
357
+ "byteOffset": 11534336
358
+ },
359
+ {
360
+ "name": "model.layers.10.post_attention_layernorm.weight",
361
+ "shape": [
362
+ 2048
363
+ ],
364
+ "dtype": "float32",
365
+ "format": "f32-to-bf16",
366
+ "nbytes": 4096,
367
+ "byteOffset": 12976128
368
+ },
369
+ {
370
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
371
+ "shape": [
372
+ 2560,
373
+ 256
374
+ ],
375
+ "dtype": "uint32",
376
+ "format": "f32-to-bf16",
377
+ "nbytes": 2621440,
378
+ "byteOffset": 12980224
379
+ },
380
+ {
381
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
382
+ "shape": [
383
+ 2560,
384
+ 64
385
+ ],
386
+ "dtype": "float32",
387
+ "format": "f32-to-bf16",
388
+ "nbytes": 327680,
389
+ "byteOffset": 15601664
390
+ },
391
+ {
392
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
393
+ "shape": [
394
+ 2048,
395
+ 256
396
+ ],
397
+ "dtype": "uint32",
398
+ "format": "f32-to-bf16",
399
+ "nbytes": 2097152,
400
+ "byteOffset": 15929344
401
+ },
402
+ {
403
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
404
+ "shape": [
405
+ 2048,
406
+ 64
407
+ ],
408
+ "dtype": "float32",
409
+ "format": "f32-to-bf16",
410
+ "nbytes": 262144,
411
+ "byteOffset": 18026496
412
+ },
413
+ {
414
+ "name": "model.layers.11.input_layernorm.weight",
415
+ "shape": [
416
+ 2048
417
+ ],
418
+ "dtype": "float32",
419
+ "format": "f32-to-bf16",
420
+ "nbytes": 4096,
421
+ "byteOffset": 18288640
422
+ },
423
+ {
424
+ "name": "model.layers.11.mlp.down_proj.q_weight",
425
+ "shape": [
426
+ 2048,
427
+ 704
428
+ ],
429
+ "dtype": "uint32",
430
+ "format": "f32-to-bf16",
431
+ "nbytes": 5767168,
432
+ "byteOffset": 18292736
433
+ },
434
+ {
435
+ "name": "model.layers.11.mlp.down_proj.q_scale",
436
+ "shape": [
437
+ 2048,
438
+ 176
439
+ ],
440
+ "dtype": "float32",
441
+ "format": "f32-to-bf16",
442
+ "nbytes": 720896,
443
+ "byteOffset": 24059904
444
+ }
445
+ ],
446
+ "md5sum": "9f801a8ec12c49b630b273561a4d9554"
447
+ },
448
+ {
449
+ "dataPath": "params_shard_5.bin",
450
+ "format": "raw-shard",
451
+ "nbytes": 24780800,
452
+ "records": [
453
+ {
454
+ "name": "model.layers.11.mlp.gate_up_proj.q_weight",
455
+ "shape": [
456
+ 11264,
457
+ 256
458
+ ],
459
+ "dtype": "uint32",
460
+ "format": "f32-to-bf16",
461
+ "nbytes": 11534336,
462
+ "byteOffset": 0
463
+ },
464
+ {
465
+ "name": "model.layers.11.mlp.gate_up_proj.q_scale",
466
+ "shape": [
467
+ 11264,
468
+ 64
469
+ ],
470
+ "dtype": "float32",
471
+ "format": "f32-to-bf16",
472
+ "nbytes": 1441792,
473
+ "byteOffset": 11534336
474
+ },
475
+ {
476
+ "name": "model.layers.11.post_attention_layernorm.weight",
477
+ "shape": [
478
+ 2048
479
+ ],
480
+ "dtype": "float32",
481
+ "format": "f32-to-bf16",
482
+ "nbytes": 4096,
483
+ "byteOffset": 12976128
484
+ },
485
+ {
486
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
487
+ "shape": [
488
+ 2560,
489
+ 256
490
+ ],
491
+ "dtype": "uint32",
492
+ "format": "f32-to-bf16",
493
+ "nbytes": 2621440,
494
+ "byteOffset": 12980224
495
+ },
496
+ {
497
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
498
+ "shape": [
499
+ 2560,
500
+ 64
501
+ ],
502
+ "dtype": "float32",
503
+ "format": "f32-to-bf16",
504
+ "nbytes": 327680,
505
+ "byteOffset": 15601664
506
+ },
507
+ {
508
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
509
+ "shape": [
510
+ 2048,
511
+ 256
512
+ ],
513
+ "dtype": "uint32",
514
+ "format": "f32-to-bf16",
515
+ "nbytes": 2097152,
516
+ "byteOffset": 15929344
517
+ },
518
+ {
519
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
520
+ "shape": [
521
+ 2048,
522
+ 64
523
+ ],
524
+ "dtype": "float32",
525
+ "format": "f32-to-bf16",
526
+ "nbytes": 262144,
527
+ "byteOffset": 18026496
528
+ },
529
+ {
530
+ "name": "model.layers.12.input_layernorm.weight",
531
+ "shape": [
532
+ 2048
533
+ ],
534
+ "dtype": "float32",
535
+ "format": "f32-to-bf16",
536
+ "nbytes": 4096,
537
+ "byteOffset": 18288640
538
+ },
539
+ {
540
+ "name": "model.layers.12.mlp.down_proj.q_weight",
541
+ "shape": [
542
+ 2048,
543
+ 704
544
+ ],
545
+ "dtype": "uint32",
546
+ "format": "f32-to-bf16",
547
+ "nbytes": 5767168,
548
+ "byteOffset": 18292736
549
+ },
550
+ {
551
+ "name": "model.layers.12.mlp.down_proj.q_scale",
552
+ "shape": [
553
+ 2048,
554
+ 176
555
+ ],
556
+ "dtype": "float32",
557
+ "format": "f32-to-bf16",
558
+ "nbytes": 720896,
559
+ "byteOffset": 24059904
560
+ }
561
+ ],
562
+ "md5sum": "4a90502242460bc5d815ab87dcc2c3fc"
563
+ },
564
+ {
565
+ "dataPath": "params_shard_6.bin",
566
+ "format": "raw-shard",
567
+ "nbytes": 24780800,
568
+ "records": [
569
+ {
570
+ "name": "model.layers.12.mlp.gate_up_proj.q_weight",
571
+ "shape": [
572
+ 11264,
573
+ 256
574
+ ],
575
+ "dtype": "uint32",
576
+ "format": "f32-to-bf16",
577
+ "nbytes": 11534336,
578
+ "byteOffset": 0
579
+ },
580
+ {
581
+ "name": "model.layers.12.mlp.gate_up_proj.q_scale",
582
+ "shape": [
583
+ 11264,
584
+ 64
585
+ ],
586
+ "dtype": "float32",
587
+ "format": "f32-to-bf16",
588
+ "nbytes": 1441792,
589
+ "byteOffset": 11534336
590
+ },
591
+ {
592
+ "name": "model.layers.12.post_attention_layernorm.weight",
593
+ "shape": [
594
+ 2048
595
+ ],
596
+ "dtype": "float32",
597
+ "format": "f32-to-bf16",
598
+ "nbytes": 4096,
599
+ "byteOffset": 12976128
600
+ },
601
+ {
602
+ "name": "model.layers.12.self_attn.qkv_proj.q_weight",
603
+ "shape": [
604
+ 2560,
605
+ 256
606
+ ],
607
+ "dtype": "uint32",
608
+ "format": "f32-to-bf16",
609
+ "nbytes": 2621440,
610
+ "byteOffset": 12980224
611
+ },
612
+ {
613
+ "name": "model.layers.12.self_attn.qkv_proj.q_scale",
614
+ "shape": [
615
+ 2560,
616
+ 64
617
+ ],
618
+ "dtype": "float32",
619
+ "format": "f32-to-bf16",
620
+ "nbytes": 327680,
621
+ "byteOffset": 15601664
622
+ },
623
+ {
624
+ "name": "model.layers.12.self_attn.o_proj.q_weight",
625
+ "shape": [
626
+ 2048,
627
+ 256
628
+ ],
629
+ "dtype": "uint32",
630
+ "format": "f32-to-bf16",
631
+ "nbytes": 2097152,
632
+ "byteOffset": 15929344
633
+ },
634
+ {
635
+ "name": "model.layers.12.self_attn.o_proj.q_scale",
636
+ "shape": [
637
+ 2048,
638
+ 64
639
+ ],
640
+ "dtype": "float32",
641
+ "format": "f32-to-bf16",
642
+ "nbytes": 262144,
643
+ "byteOffset": 18026496
644
+ },
645
+ {
646
+ "name": "model.layers.13.input_layernorm.weight",
647
+ "shape": [
648
+ 2048
649
+ ],
650
+ "dtype": "float32",
651
+ "format": "f32-to-bf16",
652
+ "nbytes": 4096,
653
+ "byteOffset": 18288640
654
+ },
655
+ {
656
+ "name": "model.layers.13.mlp.down_proj.q_weight",
657
+ "shape": [
658
+ 2048,
659
+ 704
660
+ ],
661
+ "dtype": "uint32",
662
+ "format": "f32-to-bf16",
663
+ "nbytes": 5767168,
664
+ "byteOffset": 18292736
665
+ },
666
+ {
667
+ "name": "model.layers.13.mlp.down_proj.q_scale",
668
+ "shape": [
669
+ 2048,
670
+ 176
671
+ ],
672
+ "dtype": "float32",
673
+ "format": "f32-to-bf16",
674
+ "nbytes": 720896,
675
+ "byteOffset": 24059904
676
+ }
677
+ ],
678
+ "md5sum": "4bc1e064d362ffbf9bae57c15e735e8d"
679
+ },
680
+ {
681
+ "dataPath": "params_shard_7.bin",
682
+ "format": "raw-shard",
683
+ "nbytes": 24780800,
684
+ "records": [
685
+ {
686
+ "name": "model.layers.13.mlp.gate_up_proj.q_weight",
687
+ "shape": [
688
+ 11264,
689
+ 256
690
+ ],
691
+ "dtype": "uint32",
692
+ "format": "f32-to-bf16",
693
+ "nbytes": 11534336,
694
+ "byteOffset": 0
695
+ },
696
+ {
697
+ "name": "model.layers.13.mlp.gate_up_proj.q_scale",
698
+ "shape": [
699
+ 11264,
700
+ 64
701
+ ],
702
+ "dtype": "float32",
703
+ "format": "f32-to-bf16",
704
+ "nbytes": 1441792,
705
+ "byteOffset": 11534336
706
+ },
707
+ {
708
+ "name": "model.layers.13.post_attention_layernorm.weight",
709
+ "shape": [
710
+ 2048
711
+ ],
712
+ "dtype": "float32",
713
+ "format": "f32-to-bf16",
714
+ "nbytes": 4096,
715
+ "byteOffset": 12976128
716
+ },
717
+ {
718
+ "name": "model.layers.13.self_attn.qkv_proj.q_weight",
719
+ "shape": [
720
+ 2560,
721
+ 256
722
+ ],
723
+ "dtype": "uint32",
724
+ "format": "f32-to-bf16",
725
+ "nbytes": 2621440,
726
+ "byteOffset": 12980224
727
+ },
728
+ {
729
+ "name": "model.layers.13.self_attn.qkv_proj.q_scale",
730
+ "shape": [
731
+ 2560,
732
+ 64
733
+ ],
734
+ "dtype": "float32",
735
+ "format": "f32-to-bf16",
736
+ "nbytes": 327680,
737
+ "byteOffset": 15601664
738
+ },
739
+ {
740
+ "name": "model.layers.13.self_attn.o_proj.q_weight",
741
+ "shape": [
742
+ 2048,
743
+ 256
744
+ ],
745
+ "dtype": "uint32",
746
+ "format": "f32-to-bf16",
747
+ "nbytes": 2097152,
748
+ "byteOffset": 15929344
749
+ },
750
+ {
751
+ "name": "model.layers.13.self_attn.o_proj.q_scale",
752
+ "shape": [
753
+ 2048,
754
+ 64
755
+ ],
756
+ "dtype": "float32",
757
+ "format": "f32-to-bf16",
758
+ "nbytes": 262144,
759
+ "byteOffset": 18026496
760
+ },
761
+ {
762
+ "name": "model.layers.14.input_layernorm.weight",
763
+ "shape": [
764
+ 2048
765
+ ],
766
+ "dtype": "float32",
767
+ "format": "f32-to-bf16",
768
+ "nbytes": 4096,
769
+ "byteOffset": 18288640
770
+ },
771
+ {
772
+ "name": "model.layers.14.mlp.down_proj.q_weight",
773
+ "shape": [
774
+ 2048,
775
+ 704
776
+ ],
777
+ "dtype": "uint32",
778
+ "format": "f32-to-bf16",
779
+ "nbytes": 5767168,
780
+ "byteOffset": 18292736
781
+ },
782
+ {
783
+ "name": "model.layers.14.mlp.down_proj.q_scale",
784
+ "shape": [
785
+ 2048,
786
+ 176
787
+ ],
788
+ "dtype": "float32",
789
+ "format": "f32-to-bf16",
790
+ "nbytes": 720896,
791
+ "byteOffset": 24059904
792
+ }
793
+ ],
794
+ "md5sum": "85a8f0c3b9857df7a6fa1b9cbbb5d927"
795
+ },
796
+ {
797
+ "dataPath": "params_shard_8.bin",
798
+ "format": "raw-shard",
799
+ "nbytes": 24780800,
800
+ "records": [
801
+ {
802
+ "name": "model.layers.14.mlp.gate_up_proj.q_weight",
803
+ "shape": [
804
+ 11264,
805
+ 256
806
+ ],
807
+ "dtype": "uint32",
808
+ "format": "f32-to-bf16",
809
+ "nbytes": 11534336,
810
+ "byteOffset": 0
811
+ },
812
+ {
813
+ "name": "model.layers.14.mlp.gate_up_proj.q_scale",
814
+ "shape": [
815
+ 11264,
816
+ 64
817
+ ],
818
+ "dtype": "float32",
819
+ "format": "f32-to-bf16",
820
+ "nbytes": 1441792,
821
+ "byteOffset": 11534336
822
+ },
823
+ {
824
+ "name": "model.layers.14.post_attention_layernorm.weight",
825
+ "shape": [
826
+ 2048
827
+ ],
828
+ "dtype": "float32",
829
+ "format": "f32-to-bf16",
830
+ "nbytes": 4096,
831
+ "byteOffset": 12976128
832
+ },
833
+ {
834
+ "name": "model.layers.14.self_attn.qkv_proj.q_weight",
835
+ "shape": [
836
+ 2560,
837
+ 256
838
+ ],
839
+ "dtype": "uint32",
840
+ "format": "f32-to-bf16",
841
+ "nbytes": 2621440,
842
+ "byteOffset": 12980224
843
+ },
844
+ {
845
+ "name": "model.layers.14.self_attn.qkv_proj.q_scale",
846
+ "shape": [
847
+ 2560,
848
+ 64
849
+ ],
850
+ "dtype": "float32",
851
+ "format": "f32-to-bf16",
852
+ "nbytes": 327680,
853
+ "byteOffset": 15601664
854
+ },
855
+ {
856
+ "name": "model.layers.14.self_attn.o_proj.q_weight",
857
+ "shape": [
858
+ 2048,
859
+ 256
860
+ ],
861
+ "dtype": "uint32",
862
+ "format": "f32-to-bf16",
863
+ "nbytes": 2097152,
864
+ "byteOffset": 15929344
865
+ },
866
+ {
867
+ "name": "model.layers.14.self_attn.o_proj.q_scale",
868
+ "shape": [
869
+ 2048,
870
+ 64
871
+ ],
872
+ "dtype": "float32",
873
+ "format": "f32-to-bf16",
874
+ "nbytes": 262144,
875
+ "byteOffset": 18026496
876
+ },
877
+ {
878
+ "name": "model.layers.15.input_layernorm.weight",
879
+ "shape": [
880
+ 2048
881
+ ],
882
+ "dtype": "float32",
883
+ "format": "f32-to-bf16",
884
+ "nbytes": 4096,
885
+ "byteOffset": 18288640
886
+ },
887
+ {
888
+ "name": "model.layers.15.mlp.down_proj.q_weight",
889
+ "shape": [
890
+ 2048,
891
+ 704
892
+ ],
893
+ "dtype": "uint32",
894
+ "format": "f32-to-bf16",
895
+ "nbytes": 5767168,
896
+ "byteOffset": 18292736
897
+ },
898
+ {
899
+ "name": "model.layers.15.mlp.down_proj.q_scale",
900
+ "shape": [
901
+ 2048,
902
+ 176
903
+ ],
904
+ "dtype": "float32",
905
+ "format": "f32-to-bf16",
906
+ "nbytes": 720896,
907
+ "byteOffset": 24059904
908
+ }
909
+ ],
910
+ "md5sum": "18e9c491a88f7adc43ed17d6f940f080"
911
+ },
912
+ {
913
+ "dataPath": "params_shard_9.bin",
914
+ "format": "raw-shard",
915
+ "nbytes": 24780800,
916
+ "records": [
917
+ {
918
+ "name": "model.layers.15.mlp.gate_up_proj.q_weight",
919
+ "shape": [
920
+ 11264,
921
+ 256
922
+ ],
923
+ "dtype": "uint32",
924
+ "format": "f32-to-bf16",
925
+ "nbytes": 11534336,
926
+ "byteOffset": 0
927
+ },
928
+ {
929
+ "name": "model.layers.15.mlp.gate_up_proj.q_scale",
930
+ "shape": [
931
+ 11264,
932
+ 64
933
+ ],
934
+ "dtype": "float32",
935
+ "format": "f32-to-bf16",
936
+ "nbytes": 1441792,
937
+ "byteOffset": 11534336
938
+ },
939
+ {
940
+ "name": "model.layers.15.post_attention_layernorm.weight",
941
+ "shape": [
942
+ 2048
943
+ ],
944
+ "dtype": "float32",
945
+ "format": "f32-to-bf16",
946
+ "nbytes": 4096,
947
+ "byteOffset": 12976128
948
+ },
949
+ {
950
+ "name": "model.layers.15.self_attn.qkv_proj.q_weight",
951
+ "shape": [
952
+ 2560,
953
+ 256
954
+ ],
955
+ "dtype": "uint32",
956
+ "format": "f32-to-bf16",
957
+ "nbytes": 2621440,
958
+ "byteOffset": 12980224
959
+ },
960
+ {
961
+ "name": "model.layers.15.self_attn.qkv_proj.q_scale",
962
+ "shape": [
963
+ 2560,
964
+ 64
965
+ ],
966
+ "dtype": "float32",
967
+ "format": "f32-to-bf16",
968
+ "nbytes": 327680,
969
+ "byteOffset": 15601664
970
+ },
971
+ {
972
+ "name": "model.layers.15.self_attn.o_proj.q_weight",
973
+ "shape": [
974
+ 2048,
975
+ 256
976
+ ],
977
+ "dtype": "uint32",
978
+ "format": "f32-to-bf16",
979
+ "nbytes": 2097152,
980
+ "byteOffset": 15929344
981
+ },
982
+ {
983
+ "name": "model.layers.15.self_attn.o_proj.q_scale",
984
+ "shape": [
985
+ 2048,
986
+ 64
987
+ ],
988
+ "dtype": "float32",
989
+ "format": "f32-to-bf16",
990
+ "nbytes": 262144,
991
+ "byteOffset": 18026496
992
+ },
993
+ {
994
+ "name": "model.layers.16.input_layernorm.weight",
995
+ "shape": [
996
+ 2048
997
+ ],
998
+ "dtype": "float32",
999
+ "format": "f32-to-bf16",
1000
+ "nbytes": 4096,
1001
+ "byteOffset": 18288640
1002
+ },
1003
+ {
1004
+ "name": "model.layers.16.mlp.down_proj.q_weight",
1005
+ "shape": [
1006
+ 2048,
1007
+ 704
1008
+ ],
1009
+ "dtype": "uint32",
1010
+ "format": "f32-to-bf16",
1011
+ "nbytes": 5767168,
1012
+ "byteOffset": 18292736
1013
+ },
1014
+ {
1015
+ "name": "model.layers.16.mlp.down_proj.q_scale",
1016
+ "shape": [
1017
+ 2048,
1018
+ 176
1019
+ ],
1020
+ "dtype": "float32",
1021
+ "format": "f32-to-bf16",
1022
+ "nbytes": 720896,
1023
+ "byteOffset": 24059904
1024
+ }
1025
+ ],
1026
+ "md5sum": "ebe88a618b7cb4980864878efb2bdbb7"
1027
+ },
1028
+ {
1029
+ "dataPath": "params_shard_10.bin",
1030
+ "format": "raw-shard",
1031
+ "nbytes": 24780800,
1032
+ "records": [
1033
+ {
1034
+ "name": "model.layers.16.mlp.gate_up_proj.q_weight",
1035
+ "shape": [
1036
+ 11264,
1037
+ 256
1038
+ ],
1039
+ "dtype": "uint32",
1040
+ "format": "f32-to-bf16",
1041
+ "nbytes": 11534336,
1042
+ "byteOffset": 0
1043
+ },
1044
+ {
1045
+ "name": "model.layers.16.mlp.gate_up_proj.q_scale",
1046
+ "shape": [
1047
+ 11264,
1048
+ 64
1049
+ ],
1050
+ "dtype": "float32",
1051
+ "format": "f32-to-bf16",
1052
+ "nbytes": 1441792,
1053
+ "byteOffset": 11534336
1054
+ },
1055
+ {
1056
+ "name": "model.layers.16.post_attention_layernorm.weight",
1057
+ "shape": [
1058
+ 2048
1059
+ ],
1060
+ "dtype": "float32",
1061
+ "format": "f32-to-bf16",
1062
+ "nbytes": 4096,
1063
+ "byteOffset": 12976128
1064
+ },
1065
+ {
1066
+ "name": "model.layers.16.self_attn.qkv_proj.q_weight",
1067
+ "shape": [
1068
+ 2560,
1069
+ 256
1070
+ ],
1071
+ "dtype": "uint32",
1072
+ "format": "f32-to-bf16",
1073
+ "nbytes": 2621440,
1074
+ "byteOffset": 12980224
1075
+ },
1076
+ {
1077
+ "name": "model.layers.16.self_attn.qkv_proj.q_scale",
1078
+ "shape": [
1079
+ 2560,
1080
+ 64
1081
+ ],
1082
+ "dtype": "float32",
1083
+ "format": "f32-to-bf16",
1084
+ "nbytes": 327680,
1085
+ "byteOffset": 15601664
1086
+ },
1087
+ {
1088
+ "name": "model.layers.16.self_attn.o_proj.q_weight",
1089
+ "shape": [
1090
+ 2048,
1091
+ 256
1092
+ ],
1093
+ "dtype": "uint32",
1094
+ "format": "f32-to-bf16",
1095
+ "nbytes": 2097152,
1096
+ "byteOffset": 15929344
1097
+ },
1098
+ {
1099
+ "name": "model.layers.16.self_attn.o_proj.q_scale",
1100
+ "shape": [
1101
+ 2048,
1102
+ 64
1103
+ ],
1104
+ "dtype": "float32",
1105
+ "format": "f32-to-bf16",
1106
+ "nbytes": 262144,
1107
+ "byteOffset": 18026496
1108
+ },
1109
+ {
1110
+ "name": "model.layers.17.input_layernorm.weight",
1111
+ "shape": [
1112
+ 2048
1113
+ ],
1114
+ "dtype": "float32",
1115
+ "format": "f32-to-bf16",
1116
+ "nbytes": 4096,
1117
+ "byteOffset": 18288640
1118
+ },
1119
+ {
1120
+ "name": "model.layers.17.mlp.down_proj.q_weight",
1121
+ "shape": [
1122
+ 2048,
1123
+ 704
1124
+ ],
1125
+ "dtype": "uint32",
1126
+ "format": "f32-to-bf16",
1127
+ "nbytes": 5767168,
1128
+ "byteOffset": 18292736
1129
+ },
1130
+ {
1131
+ "name": "model.layers.17.mlp.down_proj.q_scale",
1132
+ "shape": [
1133
+ 2048,
1134
+ 176
1135
+ ],
1136
+ "dtype": "float32",
1137
+ "format": "f32-to-bf16",
1138
+ "nbytes": 720896,
1139
+ "byteOffset": 24059904
1140
+ }
1141
+ ],
1142
+ "md5sum": "2a298c0af58f1e1a208b199e40b49972"
1143
+ },
1144
+ {
1145
+ "dataPath": "params_shard_11.bin",
1146
+ "format": "raw-shard",
1147
+ "nbytes": 24780800,
1148
+ "records": [
1149
+ {
1150
+ "name": "model.layers.17.mlp.gate_up_proj.q_weight",
1151
+ "shape": [
1152
+ 11264,
1153
+ 256
1154
+ ],
1155
+ "dtype": "uint32",
1156
+ "format": "f32-to-bf16",
1157
+ "nbytes": 11534336,
1158
+ "byteOffset": 0
1159
+ },
1160
+ {
1161
+ "name": "model.layers.17.mlp.gate_up_proj.q_scale",
1162
+ "shape": [
1163
+ 11264,
1164
+ 64
1165
+ ],
1166
+ "dtype": "float32",
1167
+ "format": "f32-to-bf16",
1168
+ "nbytes": 1441792,
1169
+ "byteOffset": 11534336
1170
+ },
1171
+ {
1172
+ "name": "model.layers.17.post_attention_layernorm.weight",
1173
+ "shape": [
1174
+ 2048
1175
+ ],
1176
+ "dtype": "float32",
1177
+ "format": "f32-to-bf16",
1178
+ "nbytes": 4096,
1179
+ "byteOffset": 12976128
1180
+ },
1181
+ {
1182
+ "name": "model.layers.17.self_attn.qkv_proj.q_weight",
1183
+ "shape": [
1184
+ 2560,
1185
+ 256
1186
+ ],
1187
+ "dtype": "uint32",
1188
+ "format": "f32-to-bf16",
1189
+ "nbytes": 2621440,
1190
+ "byteOffset": 12980224
1191
+ },
1192
+ {
1193
+ "name": "model.layers.17.self_attn.qkv_proj.q_scale",
1194
+ "shape": [
1195
+ 2560,
1196
+ 64
1197
+ ],
1198
+ "dtype": "float32",
1199
+ "format": "f32-to-bf16",
1200
+ "nbytes": 327680,
1201
+ "byteOffset": 15601664
1202
+ },
1203
+ {
1204
+ "name": "model.layers.17.self_attn.o_proj.q_weight",
1205
+ "shape": [
1206
+ 2048,
1207
+ 256
1208
+ ],
1209
+ "dtype": "uint32",
1210
+ "format": "f32-to-bf16",
1211
+ "nbytes": 2097152,
1212
+ "byteOffset": 15929344
1213
+ },
1214
+ {
1215
+ "name": "model.layers.17.self_attn.o_proj.q_scale",
1216
+ "shape": [
1217
+ 2048,
1218
+ 64
1219
+ ],
1220
+ "dtype": "float32",
1221
+ "format": "f32-to-bf16",
1222
+ "nbytes": 262144,
1223
+ "byteOffset": 18026496
1224
+ },
1225
+ {
1226
+ "name": "model.layers.18.input_layernorm.weight",
1227
+ "shape": [
1228
+ 2048
1229
+ ],
1230
+ "dtype": "float32",
1231
+ "format": "f32-to-bf16",
1232
+ "nbytes": 4096,
1233
+ "byteOffset": 18288640
1234
+ },
1235
+ {
1236
+ "name": "model.layers.18.mlp.down_proj.q_weight",
1237
+ "shape": [
1238
+ 2048,
1239
+ 704
1240
+ ],
1241
+ "dtype": "uint32",
1242
+ "format": "f32-to-bf16",
1243
+ "nbytes": 5767168,
1244
+ "byteOffset": 18292736
1245
+ },
1246
+ {
1247
+ "name": "model.layers.18.mlp.down_proj.q_scale",
1248
+ "shape": [
1249
+ 2048,
1250
+ 176
1251
+ ],
1252
+ "dtype": "float32",
1253
+ "format": "f32-to-bf16",
1254
+ "nbytes": 720896,
1255
+ "byteOffset": 24059904
1256
+ }
1257
+ ],
1258
+ "md5sum": "8a6ef364911a1049b2c0531be0fc15b6"
1259
+ },
1260
+ {
1261
+ "dataPath": "params_shard_12.bin",
1262
+ "format": "raw-shard",
1263
+ "nbytes": 24780800,
1264
+ "records": [
1265
+ {
1266
+ "name": "model.layers.18.mlp.gate_up_proj.q_weight",
1267
+ "shape": [
1268
+ 11264,
1269
+ 256
1270
+ ],
1271
+ "dtype": "uint32",
1272
+ "format": "f32-to-bf16",
1273
+ "nbytes": 11534336,
1274
+ "byteOffset": 0
1275
+ },
1276
+ {
1277
+ "name": "model.layers.18.mlp.gate_up_proj.q_scale",
1278
+ "shape": [
1279
+ 11264,
1280
+ 64
1281
+ ],
1282
+ "dtype": "float32",
1283
+ "format": "f32-to-bf16",
1284
+ "nbytes": 1441792,
1285
+ "byteOffset": 11534336
1286
+ },
1287
+ {
1288
+ "name": "model.layers.18.post_attention_layernorm.weight",
1289
+ "shape": [
1290
+ 2048
1291
+ ],
1292
+ "dtype": "float32",
1293
+ "format": "f32-to-bf16",
1294
+ "nbytes": 4096,
1295
+ "byteOffset": 12976128
1296
+ },
1297
+ {
1298
+ "name": "model.layers.18.self_attn.qkv_proj.q_weight",
1299
+ "shape": [
1300
+ 2560,
1301
+ 256
1302
+ ],
1303
+ "dtype": "uint32",
1304
+ "format": "f32-to-bf16",
1305
+ "nbytes": 2621440,
1306
+ "byteOffset": 12980224
1307
+ },
1308
+ {
1309
+ "name": "model.layers.18.self_attn.qkv_proj.q_scale",
1310
+ "shape": [
1311
+ 2560,
1312
+ 64
1313
+ ],
1314
+ "dtype": "float32",
1315
+ "format": "f32-to-bf16",
1316
+ "nbytes": 327680,
1317
+ "byteOffset": 15601664
1318
+ },
1319
+ {
1320
+ "name": "model.layers.18.self_attn.o_proj.q_weight",
1321
+ "shape": [
1322
+ 2048,
1323
+ 256
1324
+ ],
1325
+ "dtype": "uint32",
1326
+ "format": "f32-to-bf16",
1327
+ "nbytes": 2097152,
1328
+ "byteOffset": 15929344
1329
+ },
1330
+ {
1331
+ "name": "model.layers.18.self_attn.o_proj.q_scale",
1332
+ "shape": [
1333
+ 2048,
1334
+ 64
1335
+ ],
1336
+ "dtype": "float32",
1337
+ "format": "f32-to-bf16",
1338
+ "nbytes": 262144,
1339
+ "byteOffset": 18026496
1340
+ },
1341
+ {
1342
+ "name": "model.layers.19.input_layernorm.weight",
1343
+ "shape": [
1344
+ 2048
1345
+ ],
1346
+ "dtype": "float32",
1347
+ "format": "f32-to-bf16",
1348
+ "nbytes": 4096,
1349
+ "byteOffset": 18288640
1350
+ },
1351
+ {
1352
+ "name": "model.layers.19.mlp.down_proj.q_weight",
1353
+ "shape": [
1354
+ 2048,
1355
+ 704
1356
+ ],
1357
+ "dtype": "uint32",
1358
+ "format": "f32-to-bf16",
1359
+ "nbytes": 5767168,
1360
+ "byteOffset": 18292736
1361
+ },
1362
+ {
1363
+ "name": "model.layers.19.mlp.down_proj.q_scale",
1364
+ "shape": [
1365
+ 2048,
1366
+ 176
1367
+ ],
1368
+ "dtype": "float32",
1369
+ "format": "f32-to-bf16",
1370
+ "nbytes": 720896,
1371
+ "byteOffset": 24059904
1372
+ }
1373
+ ],
1374
+ "md5sum": "13ffdc8591e96c1c5f27090d70144bd0"
1375
+ },
1376
+ {
1377
+ "dataPath": "params_shard_13.bin",
1378
+ "format": "raw-shard",
1379
+ "nbytes": 24780800,
1380
+ "records": [
1381
+ {
1382
+ "name": "model.layers.19.mlp.gate_up_proj.q_weight",
1383
+ "shape": [
1384
+ 11264,
1385
+ 256
1386
+ ],
1387
+ "dtype": "uint32",
1388
+ "format": "f32-to-bf16",
1389
+ "nbytes": 11534336,
1390
+ "byteOffset": 0
1391
+ },
1392
+ {
1393
+ "name": "model.layers.19.mlp.gate_up_proj.q_scale",
1394
+ "shape": [
1395
+ 11264,
1396
+ 64
1397
+ ],
1398
+ "dtype": "float32",
1399
+ "format": "f32-to-bf16",
1400
+ "nbytes": 1441792,
1401
+ "byteOffset": 11534336
1402
+ },
1403
+ {
1404
+ "name": "model.layers.19.post_attention_layernorm.weight",
1405
+ "shape": [
1406
+ 2048
1407
+ ],
1408
+ "dtype": "float32",
1409
+ "format": "f32-to-bf16",
1410
+ "nbytes": 4096,
1411
+ "byteOffset": 12976128
1412
+ },
1413
+ {
1414
+ "name": "model.layers.19.self_attn.qkv_proj.q_weight",
1415
+ "shape": [
1416
+ 2560,
1417
+ 256
1418
+ ],
1419
+ "dtype": "uint32",
1420
+ "format": "f32-to-bf16",
1421
+ "nbytes": 2621440,
1422
+ "byteOffset": 12980224
1423
+ },
1424
+ {
1425
+ "name": "model.layers.19.self_attn.qkv_proj.q_scale",
1426
+ "shape": [
1427
+ 2560,
1428
+ 64
1429
+ ],
1430
+ "dtype": "float32",
1431
+ "format": "f32-to-bf16",
1432
+ "nbytes": 327680,
1433
+ "byteOffset": 15601664
1434
+ },
1435
+ {
1436
+ "name": "model.layers.19.self_attn.o_proj.q_weight",
1437
+ "shape": [
1438
+ 2048,
1439
+ 256
1440
+ ],
1441
+ "dtype": "uint32",
1442
+ "format": "f32-to-bf16",
1443
+ "nbytes": 2097152,
1444
+ "byteOffset": 15929344
1445
+ },
1446
+ {
1447
+ "name": "model.layers.19.self_attn.o_proj.q_scale",
1448
+ "shape": [
1449
+ 2048,
1450
+ 64
1451
+ ],
1452
+ "dtype": "float32",
1453
+ "format": "f32-to-bf16",
1454
+ "nbytes": 262144,
1455
+ "byteOffset": 18026496
1456
+ },
1457
+ {
1458
+ "name": "model.layers.2.input_layernorm.weight",
1459
+ "shape": [
1460
+ 2048
1461
+ ],
1462
+ "dtype": "float32",
1463
+ "format": "f32-to-bf16",
1464
+ "nbytes": 4096,
1465
+ "byteOffset": 18288640
1466
+ },
1467
+ {
1468
+ "name": "model.layers.2.mlp.down_proj.q_weight",
1469
+ "shape": [
1470
+ 2048,
1471
+ 704
1472
+ ],
1473
+ "dtype": "uint32",
1474
+ "format": "f32-to-bf16",
1475
+ "nbytes": 5767168,
1476
+ "byteOffset": 18292736
1477
+ },
1478
+ {
1479
+ "name": "model.layers.2.mlp.down_proj.q_scale",
1480
+ "shape": [
1481
+ 2048,
1482
+ 176
1483
+ ],
1484
+ "dtype": "float32",
1485
+ "format": "f32-to-bf16",
1486
+ "nbytes": 720896,
1487
+ "byteOffset": 24059904
1488
+ }
1489
+ ],
1490
+ "md5sum": "35244475eb557462112a7fa2d3db4dc9"
1491
+ },
1492
+ {
1493
+ "dataPath": "params_shard_14.bin",
1494
+ "format": "raw-shard",
1495
+ "nbytes": 24780800,
1496
+ "records": [
1497
+ {
1498
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
1499
+ "shape": [
1500
+ 11264,
1501
+ 256
1502
+ ],
1503
+ "dtype": "uint32",
1504
+ "format": "f32-to-bf16",
1505
+ "nbytes": 11534336,
1506
+ "byteOffset": 0
1507
+ },
1508
+ {
1509
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
1510
+ "shape": [
1511
+ 11264,
1512
+ 64
1513
+ ],
1514
+ "dtype": "float32",
1515
+ "format": "f32-to-bf16",
1516
+ "nbytes": 1441792,
1517
+ "byteOffset": 11534336
1518
+ },
1519
+ {
1520
+ "name": "model.layers.2.post_attention_layernorm.weight",
1521
+ "shape": [
1522
+ 2048
1523
+ ],
1524
+ "dtype": "float32",
1525
+ "format": "f32-to-bf16",
1526
+ "nbytes": 4096,
1527
+ "byteOffset": 12976128
1528
+ },
1529
+ {
1530
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
1531
+ "shape": [
1532
+ 2560,
1533
+ 256
1534
+ ],
1535
+ "dtype": "uint32",
1536
+ "format": "f32-to-bf16",
1537
+ "nbytes": 2621440,
1538
+ "byteOffset": 12980224
1539
+ },
1540
+ {
1541
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
1542
+ "shape": [
1543
+ 2560,
1544
+ 64
1545
+ ],
1546
+ "dtype": "float32",
1547
+ "format": "f32-to-bf16",
1548
+ "nbytes": 327680,
1549
+ "byteOffset": 15601664
1550
+ },
1551
+ {
1552
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
1553
+ "shape": [
1554
+ 2048,
1555
+ 256
1556
+ ],
1557
+ "dtype": "uint32",
1558
+ "format": "f32-to-bf16",
1559
+ "nbytes": 2097152,
1560
+ "byteOffset": 15929344
1561
+ },
1562
+ {
1563
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
1564
+ "shape": [
1565
+ 2048,
1566
+ 64
1567
+ ],
1568
+ "dtype": "float32",
1569
+ "format": "f32-to-bf16",
1570
+ "nbytes": 262144,
1571
+ "byteOffset": 18026496
1572
+ },
1573
+ {
1574
+ "name": "model.layers.20.input_layernorm.weight",
1575
+ "shape": [
1576
+ 2048
1577
+ ],
1578
+ "dtype": "float32",
1579
+ "format": "f32-to-bf16",
1580
+ "nbytes": 4096,
1581
+ "byteOffset": 18288640
1582
+ },
1583
+ {
1584
+ "name": "model.layers.20.mlp.down_proj.q_weight",
1585
+ "shape": [
1586
+ 2048,
1587
+ 704
1588
+ ],
1589
+ "dtype": "uint32",
1590
+ "format": "f32-to-bf16",
1591
+ "nbytes": 5767168,
1592
+ "byteOffset": 18292736
1593
+ },
1594
+ {
1595
+ "name": "model.layers.20.mlp.down_proj.q_scale",
1596
+ "shape": [
1597
+ 2048,
1598
+ 176
1599
+ ],
1600
+ "dtype": "float32",
1601
+ "format": "f32-to-bf16",
1602
+ "nbytes": 720896,
1603
+ "byteOffset": 24059904
1604
+ }
1605
+ ],
1606
+ "md5sum": "f87ee6617b7866df37b8c66980d7144f"
1607
+ },
1608
+ {
1609
+ "dataPath": "params_shard_15.bin",
1610
+ "format": "raw-shard",
1611
+ "nbytes": 24780800,
1612
+ "records": [
1613
+ {
1614
+ "name": "model.layers.20.mlp.gate_up_proj.q_weight",
1615
+ "shape": [
1616
+ 11264,
1617
+ 256
1618
+ ],
1619
+ "dtype": "uint32",
1620
+ "format": "f32-to-bf16",
1621
+ "nbytes": 11534336,
1622
+ "byteOffset": 0
1623
+ },
1624
+ {
1625
+ "name": "model.layers.20.mlp.gate_up_proj.q_scale",
1626
+ "shape": [
1627
+ 11264,
1628
+ 64
1629
+ ],
1630
+ "dtype": "float32",
1631
+ "format": "f32-to-bf16",
1632
+ "nbytes": 1441792,
1633
+ "byteOffset": 11534336
1634
+ },
1635
+ {
1636
+ "name": "model.layers.20.post_attention_layernorm.weight",
1637
+ "shape": [
1638
+ 2048
1639
+ ],
1640
+ "dtype": "float32",
1641
+ "format": "f32-to-bf16",
1642
+ "nbytes": 4096,
1643
+ "byteOffset": 12976128
1644
+ },
1645
+ {
1646
+ "name": "model.layers.20.self_attn.qkv_proj.q_weight",
1647
+ "shape": [
1648
+ 2560,
1649
+ 256
1650
+ ],
1651
+ "dtype": "uint32",
1652
+ "format": "f32-to-bf16",
1653
+ "nbytes": 2621440,
1654
+ "byteOffset": 12980224
1655
+ },
1656
+ {
1657
+ "name": "model.layers.20.self_attn.qkv_proj.q_scale",
1658
+ "shape": [
1659
+ 2560,
1660
+ 64
1661
+ ],
1662
+ "dtype": "float32",
1663
+ "format": "f32-to-bf16",
1664
+ "nbytes": 327680,
1665
+ "byteOffset": 15601664
1666
+ },
1667
+ {
1668
+ "name": "model.layers.20.self_attn.o_proj.q_weight",
1669
+ "shape": [
1670
+ 2048,
1671
+ 256
1672
+ ],
1673
+ "dtype": "uint32",
1674
+ "format": "f32-to-bf16",
1675
+ "nbytes": 2097152,
1676
+ "byteOffset": 15929344
1677
+ },
1678
+ {
1679
+ "name": "model.layers.20.self_attn.o_proj.q_scale",
1680
+ "shape": [
1681
+ 2048,
1682
+ 64
1683
+ ],
1684
+ "dtype": "float32",
1685
+ "format": "f32-to-bf16",
1686
+ "nbytes": 262144,
1687
+ "byteOffset": 18026496
1688
+ },
1689
+ {
1690
+ "name": "model.layers.21.input_layernorm.weight",
1691
+ "shape": [
1692
+ 2048
1693
+ ],
1694
+ "dtype": "float32",
1695
+ "format": "f32-to-bf16",
1696
+ "nbytes": 4096,
1697
+ "byteOffset": 18288640
1698
+ },
1699
+ {
1700
+ "name": "model.layers.21.mlp.down_proj.q_weight",
1701
+ "shape": [
1702
+ 2048,
1703
+ 704
1704
+ ],
1705
+ "dtype": "uint32",
1706
+ "format": "f32-to-bf16",
1707
+ "nbytes": 5767168,
1708
+ "byteOffset": 18292736
1709
+ },
1710
+ {
1711
+ "name": "model.layers.21.mlp.down_proj.q_scale",
1712
+ "shape": [
1713
+ 2048,
1714
+ 176
1715
+ ],
1716
+ "dtype": "float32",
1717
+ "format": "f32-to-bf16",
1718
+ "nbytes": 720896,
1719
+ "byteOffset": 24059904
1720
+ }
1721
+ ],
1722
+ "md5sum": "b1864fe4abf3244367e8fedd502eb8ca"
1723
+ },
1724
+ {
1725
+ "dataPath": "params_shard_16.bin",
1726
+ "format": "raw-shard",
1727
+ "nbytes": 24780800,
1728
+ "records": [
1729
+ {
1730
+ "name": "model.layers.21.mlp.gate_up_proj.q_weight",
1731
+ "shape": [
1732
+ 11264,
1733
+ 256
1734
+ ],
1735
+ "dtype": "uint32",
1736
+ "format": "f32-to-bf16",
1737
+ "nbytes": 11534336,
1738
+ "byteOffset": 0
1739
+ },
1740
+ {
1741
+ "name": "model.layers.21.mlp.gate_up_proj.q_scale",
1742
+ "shape": [
1743
+ 11264,
1744
+ 64
1745
+ ],
1746
+ "dtype": "float32",
1747
+ "format": "f32-to-bf16",
1748
+ "nbytes": 1441792,
1749
+ "byteOffset": 11534336
1750
+ },
1751
+ {
1752
+ "name": "model.layers.21.post_attention_layernorm.weight",
1753
+ "shape": [
1754
+ 2048
1755
+ ],
1756
+ "dtype": "float32",
1757
+ "format": "f32-to-bf16",
1758
+ "nbytes": 4096,
1759
+ "byteOffset": 12976128
1760
+ },
1761
+ {
1762
+ "name": "model.layers.21.self_attn.qkv_proj.q_weight",
1763
+ "shape": [
1764
+ 2560,
1765
+ 256
1766
+ ],
1767
+ "dtype": "uint32",
1768
+ "format": "f32-to-bf16",
1769
+ "nbytes": 2621440,
1770
+ "byteOffset": 12980224
1771
+ },
1772
+ {
1773
+ "name": "model.layers.21.self_attn.qkv_proj.q_scale",
1774
+ "shape": [
1775
+ 2560,
1776
+ 64
1777
+ ],
1778
+ "dtype": "float32",
1779
+ "format": "f32-to-bf16",
1780
+ "nbytes": 327680,
1781
+ "byteOffset": 15601664
1782
+ },
1783
+ {
1784
+ "name": "model.layers.21.self_attn.o_proj.q_weight",
1785
+ "shape": [
1786
+ 2048,
1787
+ 256
1788
+ ],
1789
+ "dtype": "uint32",
1790
+ "format": "f32-to-bf16",
1791
+ "nbytes": 2097152,
1792
+ "byteOffset": 15929344
1793
+ },
1794
+ {
1795
+ "name": "model.layers.21.self_attn.o_proj.q_scale",
1796
+ "shape": [
1797
+ 2048,
1798
+ 64
1799
+ ],
1800
+ "dtype": "float32",
1801
+ "format": "f32-to-bf16",
1802
+ "nbytes": 262144,
1803
+ "byteOffset": 18026496
1804
+ },
1805
+ {
1806
+ "name": "model.layers.3.input_layernorm.weight",
1807
+ "shape": [
1808
+ 2048
1809
+ ],
1810
+ "dtype": "float32",
1811
+ "format": "f32-to-bf16",
1812
+ "nbytes": 4096,
1813
+ "byteOffset": 18288640
1814
+ },
1815
+ {
1816
+ "name": "model.layers.3.mlp.down_proj.q_weight",
1817
+ "shape": [
1818
+ 2048,
1819
+ 704
1820
+ ],
1821
+ "dtype": "uint32",
1822
+ "format": "f32-to-bf16",
1823
+ "nbytes": 5767168,
1824
+ "byteOffset": 18292736
1825
+ },
1826
+ {
1827
+ "name": "model.layers.3.mlp.down_proj.q_scale",
1828
+ "shape": [
1829
+ 2048,
1830
+ 176
1831
+ ],
1832
+ "dtype": "float32",
1833
+ "format": "f32-to-bf16",
1834
+ "nbytes": 720896,
1835
+ "byteOffset": 24059904
1836
+ }
1837
+ ],
1838
+ "md5sum": "505cab1c1b2228b83fd2c1d2240f0959"
1839
+ },
1840
+ {
1841
+ "dataPath": "params_shard_17.bin",
1842
+ "format": "raw-shard",
1843
+ "nbytes": 24780800,
1844
+ "records": [
1845
+ {
1846
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
1847
+ "shape": [
1848
+ 11264,
1849
+ 256
1850
+ ],
1851
+ "dtype": "uint32",
1852
+ "format": "f32-to-bf16",
1853
+ "nbytes": 11534336,
1854
+ "byteOffset": 0
1855
+ },
1856
+ {
1857
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
1858
+ "shape": [
1859
+ 11264,
1860
+ 64
1861
+ ],
1862
+ "dtype": "float32",
1863
+ "format": "f32-to-bf16",
1864
+ "nbytes": 1441792,
1865
+ "byteOffset": 11534336
1866
+ },
1867
+ {
1868
+ "name": "model.layers.3.post_attention_layernorm.weight",
1869
+ "shape": [
1870
+ 2048
1871
+ ],
1872
+ "dtype": "float32",
1873
+ "format": "f32-to-bf16",
1874
+ "nbytes": 4096,
1875
+ "byteOffset": 12976128
1876
+ },
1877
+ {
1878
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
1879
+ "shape": [
1880
+ 2560,
1881
+ 256
1882
+ ],
1883
+ "dtype": "uint32",
1884
+ "format": "f32-to-bf16",
1885
+ "nbytes": 2621440,
1886
+ "byteOffset": 12980224
1887
+ },
1888
+ {
1889
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
1890
+ "shape": [
1891
+ 2560,
1892
+ 64
1893
+ ],
1894
+ "dtype": "float32",
1895
+ "format": "f32-to-bf16",
1896
+ "nbytes": 327680,
1897
+ "byteOffset": 15601664
1898
+ },
1899
+ {
1900
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
1901
+ "shape": [
1902
+ 2048,
1903
+ 256
1904
+ ],
1905
+ "dtype": "uint32",
1906
+ "format": "f32-to-bf16",
1907
+ "nbytes": 2097152,
1908
+ "byteOffset": 15929344
1909
+ },
1910
+ {
1911
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
1912
+ "shape": [
1913
+ 2048,
1914
+ 64
1915
+ ],
1916
+ "dtype": "float32",
1917
+ "format": "f32-to-bf16",
1918
+ "nbytes": 262144,
1919
+ "byteOffset": 18026496
1920
+ },
1921
+ {
1922
+ "name": "model.layers.4.input_layernorm.weight",
1923
+ "shape": [
1924
+ 2048
1925
+ ],
1926
+ "dtype": "float32",
1927
+ "format": "f32-to-bf16",
1928
+ "nbytes": 4096,
1929
+ "byteOffset": 18288640
1930
+ },
1931
+ {
1932
+ "name": "model.layers.4.mlp.down_proj.q_weight",
1933
+ "shape": [
1934
+ 2048,
1935
+ 704
1936
+ ],
1937
+ "dtype": "uint32",
1938
+ "format": "f32-to-bf16",
1939
+ "nbytes": 5767168,
1940
+ "byteOffset": 18292736
1941
+ },
1942
+ {
1943
+ "name": "model.layers.4.mlp.down_proj.q_scale",
1944
+ "shape": [
1945
+ 2048,
1946
+ 176
1947
+ ],
1948
+ "dtype": "float32",
1949
+ "format": "f32-to-bf16",
1950
+ "nbytes": 720896,
1951
+ "byteOffset": 24059904
1952
+ }
1953
+ ],
1954
+ "md5sum": "a798beddbb6ccf6e8b76b51ae087c28e"
1955
+ },
1956
+ {
1957
+ "dataPath": "params_shard_18.bin",
1958
+ "format": "raw-shard",
1959
+ "nbytes": 24780800,
1960
+ "records": [
1961
+ {
1962
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
1963
+ "shape": [
1964
+ 11264,
1965
+ 256
1966
+ ],
1967
+ "dtype": "uint32",
1968
+ "format": "f32-to-bf16",
1969
+ "nbytes": 11534336,
1970
+ "byteOffset": 0
1971
+ },
1972
+ {
1973
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
1974
+ "shape": [
1975
+ 11264,
1976
+ 64
1977
+ ],
1978
+ "dtype": "float32",
1979
+ "format": "f32-to-bf16",
1980
+ "nbytes": 1441792,
1981
+ "byteOffset": 11534336
1982
+ },
1983
+ {
1984
+ "name": "model.layers.4.post_attention_layernorm.weight",
1985
+ "shape": [
1986
+ 2048
1987
+ ],
1988
+ "dtype": "float32",
1989
+ "format": "f32-to-bf16",
1990
+ "nbytes": 4096,
1991
+ "byteOffset": 12976128
1992
+ },
1993
+ {
1994
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
1995
+ "shape": [
1996
+ 2560,
1997
+ 256
1998
+ ],
1999
+ "dtype": "uint32",
2000
+ "format": "f32-to-bf16",
2001
+ "nbytes": 2621440,
2002
+ "byteOffset": 12980224
2003
+ },
2004
+ {
2005
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
2006
+ "shape": [
2007
+ 2560,
2008
+ 64
2009
+ ],
2010
+ "dtype": "float32",
2011
+ "format": "f32-to-bf16",
2012
+ "nbytes": 327680,
2013
+ "byteOffset": 15601664
2014
+ },
2015
+ {
2016
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
2017
+ "shape": [
2018
+ 2048,
2019
+ 256
2020
+ ],
2021
+ "dtype": "uint32",
2022
+ "format": "f32-to-bf16",
2023
+ "nbytes": 2097152,
2024
+ "byteOffset": 15929344
2025
+ },
2026
+ {
2027
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
2028
+ "shape": [
2029
+ 2048,
2030
+ 64
2031
+ ],
2032
+ "dtype": "float32",
2033
+ "format": "f32-to-bf16",
2034
+ "nbytes": 262144,
2035
+ "byteOffset": 18026496
2036
+ },
2037
+ {
2038
+ "name": "model.layers.5.input_layernorm.weight",
2039
+ "shape": [
2040
+ 2048
2041
+ ],
2042
+ "dtype": "float32",
2043
+ "format": "f32-to-bf16",
2044
+ "nbytes": 4096,
2045
+ "byteOffset": 18288640
2046
+ },
2047
+ {
2048
+ "name": "model.layers.5.mlp.down_proj.q_weight",
2049
+ "shape": [
2050
+ 2048,
2051
+ 704
2052
+ ],
2053
+ "dtype": "uint32",
2054
+ "format": "f32-to-bf16",
2055
+ "nbytes": 5767168,
2056
+ "byteOffset": 18292736
2057
+ },
2058
+ {
2059
+ "name": "model.layers.5.mlp.down_proj.q_scale",
2060
+ "shape": [
2061
+ 2048,
2062
+ 176
2063
+ ],
2064
+ "dtype": "float32",
2065
+ "format": "f32-to-bf16",
2066
+ "nbytes": 720896,
2067
+ "byteOffset": 24059904
2068
+ }
2069
+ ],
2070
+ "md5sum": "4fc462bba2bb7ed3004b13350e52ce3f"
2071
+ },
2072
+ {
2073
+ "dataPath": "params_shard_19.bin",
2074
+ "format": "raw-shard",
2075
+ "nbytes": 24780800,
2076
+ "records": [
2077
+ {
2078
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
2079
+ "shape": [
2080
+ 11264,
2081
+ 256
2082
+ ],
2083
+ "dtype": "uint32",
2084
+ "format": "f32-to-bf16",
2085
+ "nbytes": 11534336,
2086
+ "byteOffset": 0
2087
+ },
2088
+ {
2089
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
2090
+ "shape": [
2091
+ 11264,
2092
+ 64
2093
+ ],
2094
+ "dtype": "float32",
2095
+ "format": "f32-to-bf16",
2096
+ "nbytes": 1441792,
2097
+ "byteOffset": 11534336
2098
+ },
2099
+ {
2100
+ "name": "model.layers.5.post_attention_layernorm.weight",
2101
+ "shape": [
2102
+ 2048
2103
+ ],
2104
+ "dtype": "float32",
2105
+ "format": "f32-to-bf16",
2106
+ "nbytes": 4096,
2107
+ "byteOffset": 12976128
2108
+ },
2109
+ {
2110
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
2111
+ "shape": [
2112
+ 2560,
2113
+ 256
2114
+ ],
2115
+ "dtype": "uint32",
2116
+ "format": "f32-to-bf16",
2117
+ "nbytes": 2621440,
2118
+ "byteOffset": 12980224
2119
+ },
2120
+ {
2121
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
2122
+ "shape": [
2123
+ 2560,
2124
+ 64
2125
+ ],
2126
+ "dtype": "float32",
2127
+ "format": "f32-to-bf16",
2128
+ "nbytes": 327680,
2129
+ "byteOffset": 15601664
2130
+ },
2131
+ {
2132
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
2133
+ "shape": [
2134
+ 2048,
2135
+ 256
2136
+ ],
2137
+ "dtype": "uint32",
2138
+ "format": "f32-to-bf16",
2139
+ "nbytes": 2097152,
2140
+ "byteOffset": 15929344
2141
+ },
2142
+ {
2143
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
2144
+ "shape": [
2145
+ 2048,
2146
+ 64
2147
+ ],
2148
+ "dtype": "float32",
2149
+ "format": "f32-to-bf16",
2150
+ "nbytes": 262144,
2151
+ "byteOffset": 18026496
2152
+ },
2153
+ {
2154
+ "name": "model.layers.6.input_layernorm.weight",
2155
+ "shape": [
2156
+ 2048
2157
+ ],
2158
+ "dtype": "float32",
2159
+ "format": "f32-to-bf16",
2160
+ "nbytes": 4096,
2161
+ "byteOffset": 18288640
2162
+ },
2163
+ {
2164
+ "name": "model.layers.6.mlp.down_proj.q_weight",
2165
+ "shape": [
2166
+ 2048,
2167
+ 704
2168
+ ],
2169
+ "dtype": "uint32",
2170
+ "format": "f32-to-bf16",
2171
+ "nbytes": 5767168,
2172
+ "byteOffset": 18292736
2173
+ },
2174
+ {
2175
+ "name": "model.layers.6.mlp.down_proj.q_scale",
2176
+ "shape": [
2177
+ 2048,
2178
+ 176
2179
+ ],
2180
+ "dtype": "float32",
2181
+ "format": "f32-to-bf16",
2182
+ "nbytes": 720896,
2183
+ "byteOffset": 24059904
2184
+ }
2185
+ ],
2186
+ "md5sum": "6f0b3e5f9da341272077d653a0983dfe"
2187
+ },
2188
+ {
2189
+ "dataPath": "params_shard_20.bin",
2190
+ "format": "raw-shard",
2191
+ "nbytes": 24780800,
2192
+ "records": [
2193
+ {
2194
+ "name": "model.layers.6.mlp.gate_up_proj.q_weight",
2195
+ "shape": [
2196
+ 11264,
2197
+ 256
2198
+ ],
2199
+ "dtype": "uint32",
2200
+ "format": "f32-to-bf16",
2201
+ "nbytes": 11534336,
2202
+ "byteOffset": 0
2203
+ },
2204
+ {
2205
+ "name": "model.layers.6.mlp.gate_up_proj.q_scale",
2206
+ "shape": [
2207
+ 11264,
2208
+ 64
2209
+ ],
2210
+ "dtype": "float32",
2211
+ "format": "f32-to-bf16",
2212
+ "nbytes": 1441792,
2213
+ "byteOffset": 11534336
2214
+ },
2215
+ {
2216
+ "name": "model.layers.6.post_attention_layernorm.weight",
2217
+ "shape": [
2218
+ 2048
2219
+ ],
2220
+ "dtype": "float32",
2221
+ "format": "f32-to-bf16",
2222
+ "nbytes": 4096,
2223
+ "byteOffset": 12976128
2224
+ },
2225
+ {
2226
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
2227
+ "shape": [
2228
+ 2560,
2229
+ 256
2230
+ ],
2231
+ "dtype": "uint32",
2232
+ "format": "f32-to-bf16",
2233
+ "nbytes": 2621440,
2234
+ "byteOffset": 12980224
2235
+ },
2236
+ {
2237
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
2238
+ "shape": [
2239
+ 2560,
2240
+ 64
2241
+ ],
2242
+ "dtype": "float32",
2243
+ "format": "f32-to-bf16",
2244
+ "nbytes": 327680,
2245
+ "byteOffset": 15601664
2246
+ },
2247
+ {
2248
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
2249
+ "shape": [
2250
+ 2048,
2251
+ 256
2252
+ ],
2253
+ "dtype": "uint32",
2254
+ "format": "f32-to-bf16",
2255
+ "nbytes": 2097152,
2256
+ "byteOffset": 15929344
2257
+ },
2258
+ {
2259
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
2260
+ "shape": [
2261
+ 2048,
2262
+ 64
2263
+ ],
2264
+ "dtype": "float32",
2265
+ "format": "f32-to-bf16",
2266
+ "nbytes": 262144,
2267
+ "byteOffset": 18026496
2268
+ },
2269
+ {
2270
+ "name": "model.layers.7.input_layernorm.weight",
2271
+ "shape": [
2272
+ 2048
2273
+ ],
2274
+ "dtype": "float32",
2275
+ "format": "f32-to-bf16",
2276
+ "nbytes": 4096,
2277
+ "byteOffset": 18288640
2278
+ },
2279
+ {
2280
+ "name": "model.layers.7.mlp.down_proj.q_weight",
2281
+ "shape": [
2282
+ 2048,
2283
+ 704
2284
+ ],
2285
+ "dtype": "uint32",
2286
+ "format": "f32-to-bf16",
2287
+ "nbytes": 5767168,
2288
+ "byteOffset": 18292736
2289
+ },
2290
+ {
2291
+ "name": "model.layers.7.mlp.down_proj.q_scale",
2292
+ "shape": [
2293
+ 2048,
2294
+ 176
2295
+ ],
2296
+ "dtype": "float32",
2297
+ "format": "f32-to-bf16",
2298
+ "nbytes": 720896,
2299
+ "byteOffset": 24059904
2300
+ }
2301
+ ],
2302
+ "md5sum": "686b2f5cffad40c9ede9515e7d23237e"
2303
+ },
2304
+ {
2305
+ "dataPath": "params_shard_21.bin",
2306
+ "format": "raw-shard",
2307
+ "nbytes": 24780800,
2308
+ "records": [
2309
+ {
2310
+ "name": "model.layers.7.mlp.gate_up_proj.q_weight",
2311
+ "shape": [
2312
+ 11264,
2313
+ 256
2314
+ ],
2315
+ "dtype": "uint32",
2316
+ "format": "f32-to-bf16",
2317
+ "nbytes": 11534336,
2318
+ "byteOffset": 0
2319
+ },
2320
+ {
2321
+ "name": "model.layers.7.mlp.gate_up_proj.q_scale",
2322
+ "shape": [
2323
+ 11264,
2324
+ 64
2325
+ ],
2326
+ "dtype": "float32",
2327
+ "format": "f32-to-bf16",
2328
+ "nbytes": 1441792,
2329
+ "byteOffset": 11534336
2330
+ },
2331
+ {
2332
+ "name": "model.layers.7.post_attention_layernorm.weight",
2333
+ "shape": [
2334
+ 2048
2335
+ ],
2336
+ "dtype": "float32",
2337
+ "format": "f32-to-bf16",
2338
+ "nbytes": 4096,
2339
+ "byteOffset": 12976128
2340
+ },
2341
+ {
2342
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
2343
+ "shape": [
2344
+ 2560,
2345
+ 256
2346
+ ],
2347
+ "dtype": "uint32",
2348
+ "format": "f32-to-bf16",
2349
+ "nbytes": 2621440,
2350
+ "byteOffset": 12980224
2351
+ },
2352
+ {
2353
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
2354
+ "shape": [
2355
+ 2560,
2356
+ 64
2357
+ ],
2358
+ "dtype": "float32",
2359
+ "format": "f32-to-bf16",
2360
+ "nbytes": 327680,
2361
+ "byteOffset": 15601664
2362
+ },
2363
+ {
2364
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
2365
+ "shape": [
2366
+ 2048,
2367
+ 256
2368
+ ],
2369
+ "dtype": "uint32",
2370
+ "format": "f32-to-bf16",
2371
+ "nbytes": 2097152,
2372
+ "byteOffset": 15929344
2373
+ },
2374
+ {
2375
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
2376
+ "shape": [
2377
+ 2048,
2378
+ 64
2379
+ ],
2380
+ "dtype": "float32",
2381
+ "format": "f32-to-bf16",
2382
+ "nbytes": 262144,
2383
+ "byteOffset": 18026496
2384
+ },
2385
+ {
2386
+ "name": "model.layers.8.input_layernorm.weight",
2387
+ "shape": [
2388
+ 2048
2389
+ ],
2390
+ "dtype": "float32",
2391
+ "format": "f32-to-bf16",
2392
+ "nbytes": 4096,
2393
+ "byteOffset": 18288640
2394
+ },
2395
+ {
2396
+ "name": "model.layers.8.mlp.down_proj.q_weight",
2397
+ "shape": [
2398
+ 2048,
2399
+ 704
2400
+ ],
2401
+ "dtype": "uint32",
2402
+ "format": "f32-to-bf16",
2403
+ "nbytes": 5767168,
2404
+ "byteOffset": 18292736
2405
+ },
2406
+ {
2407
+ "name": "model.layers.8.mlp.down_proj.q_scale",
2408
+ "shape": [
2409
+ 2048,
2410
+ 176
2411
+ ],
2412
+ "dtype": "float32",
2413
+ "format": "f32-to-bf16",
2414
+ "nbytes": 720896,
2415
+ "byteOffset": 24059904
2416
+ }
2417
+ ],
2418
+ "md5sum": "686faea393d9673351ea835a31d58d1c"
2419
+ },
2420
+ {
2421
+ "dataPath": "params_shard_22.bin",
2422
+ "format": "raw-shard",
2423
+ "nbytes": 24780800,
2424
+ "records": [
2425
+ {
2426
+ "name": "model.layers.8.mlp.gate_up_proj.q_weight",
2427
+ "shape": [
2428
+ 11264,
2429
+ 256
2430
+ ],
2431
+ "dtype": "uint32",
2432
+ "format": "f32-to-bf16",
2433
+ "nbytes": 11534336,
2434
+ "byteOffset": 0
2435
+ },
2436
+ {
2437
+ "name": "model.layers.8.mlp.gate_up_proj.q_scale",
2438
+ "shape": [
2439
+ 11264,
2440
+ 64
2441
+ ],
2442
+ "dtype": "float32",
2443
+ "format": "f32-to-bf16",
2444
+ "nbytes": 1441792,
2445
+ "byteOffset": 11534336
2446
+ },
2447
+ {
2448
+ "name": "model.layers.8.post_attention_layernorm.weight",
2449
+ "shape": [
2450
+ 2048
2451
+ ],
2452
+ "dtype": "float32",
2453
+ "format": "f32-to-bf16",
2454
+ "nbytes": 4096,
2455
+ "byteOffset": 12976128
2456
+ },
2457
+ {
2458
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
2459
+ "shape": [
2460
+ 2560,
2461
+ 256
2462
+ ],
2463
+ "dtype": "uint32",
2464
+ "format": "f32-to-bf16",
2465
+ "nbytes": 2621440,
2466
+ "byteOffset": 12980224
2467
+ },
2468
+ {
2469
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
2470
+ "shape": [
2471
+ 2560,
2472
+ 64
2473
+ ],
2474
+ "dtype": "float32",
2475
+ "format": "f32-to-bf16",
2476
+ "nbytes": 327680,
2477
+ "byteOffset": 15601664
2478
+ },
2479
+ {
2480
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
2481
+ "shape": [
2482
+ 2048,
2483
+ 256
2484
+ ],
2485
+ "dtype": "uint32",
2486
+ "format": "f32-to-bf16",
2487
+ "nbytes": 2097152,
2488
+ "byteOffset": 15929344
2489
+ },
2490
+ {
2491
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
2492
+ "shape": [
2493
+ 2048,
2494
+ 64
2495
+ ],
2496
+ "dtype": "float32",
2497
+ "format": "f32-to-bf16",
2498
+ "nbytes": 262144,
2499
+ "byteOffset": 18026496
2500
+ },
2501
+ {
2502
+ "name": "model.layers.9.input_layernorm.weight",
2503
+ "shape": [
2504
+ 2048
2505
+ ],
2506
+ "dtype": "float32",
2507
+ "format": "f32-to-bf16",
2508
+ "nbytes": 4096,
2509
+ "byteOffset": 18288640
2510
+ },
2511
+ {
2512
+ "name": "model.layers.9.mlp.down_proj.q_weight",
2513
+ "shape": [
2514
+ 2048,
2515
+ 704
2516
+ ],
2517
+ "dtype": "uint32",
2518
+ "format": "f32-to-bf16",
2519
+ "nbytes": 5767168,
2520
+ "byteOffset": 18292736
2521
+ },
2522
+ {
2523
+ "name": "model.layers.9.mlp.down_proj.q_scale",
2524
+ "shape": [
2525
+ 2048,
2526
+ 176
2527
+ ],
2528
+ "dtype": "float32",
2529
+ "format": "f32-to-bf16",
2530
+ "nbytes": 720896,
2531
+ "byteOffset": 24059904
2532
+ }
2533
+ ],
2534
+ "md5sum": "5a11a5b68f457af4ae8df8926c2c02ff"
2535
+ },
2536
+ {
2537
+ "dataPath": "params_shard_23.bin",
2538
+ "format": "raw-shard",
2539
+ "nbytes": 18292736,
2540
+ "records": [
2541
+ {
2542
+ "name": "model.layers.9.mlp.gate_up_proj.q_weight",
2543
+ "shape": [
2544
+ 11264,
2545
+ 256
2546
+ ],
2547
+ "dtype": "uint32",
2548
+ "format": "f32-to-bf16",
2549
+ "nbytes": 11534336,
2550
+ "byteOffset": 0
2551
+ },
2552
+ {
2553
+ "name": "model.layers.9.mlp.gate_up_proj.q_scale",
2554
+ "shape": [
2555
+ 11264,
2556
+ 64
2557
+ ],
2558
+ "dtype": "float32",
2559
+ "format": "f32-to-bf16",
2560
+ "nbytes": 1441792,
2561
+ "byteOffset": 11534336
2562
+ },
2563
+ {
2564
+ "name": "model.layers.9.post_attention_layernorm.weight",
2565
+ "shape": [
2566
+ 2048
2567
+ ],
2568
+ "dtype": "float32",
2569
+ "format": "f32-to-bf16",
2570
+ "nbytes": 4096,
2571
+ "byteOffset": 12976128
2572
+ },
2573
+ {
2574
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
2575
+ "shape": [
2576
+ 2560,
2577
+ 256
2578
+ ],
2579
+ "dtype": "uint32",
2580
+ "format": "f32-to-bf16",
2581
+ "nbytes": 2621440,
2582
+ "byteOffset": 12980224
2583
+ },
2584
+ {
2585
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
2586
+ "shape": [
2587
+ 2560,
2588
+ 64
2589
+ ],
2590
+ "dtype": "float32",
2591
+ "format": "f32-to-bf16",
2592
+ "nbytes": 327680,
2593
+ "byteOffset": 15601664
2594
+ },
2595
+ {
2596
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
2597
+ "shape": [
2598
+ 2048,
2599
+ 256
2600
+ ],
2601
+ "dtype": "uint32",
2602
+ "format": "f32-to-bf16",
2603
+ "nbytes": 2097152,
2604
+ "byteOffset": 15929344
2605
+ },
2606
+ {
2607
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
2608
+ "shape": [
2609
+ 2048,
2610
+ 64
2611
+ ],
2612
+ "dtype": "float32",
2613
+ "format": "f32-to-bf16",
2614
+ "nbytes": 262144,
2615
+ "byteOffset": 18026496
2616
+ },
2617
+ {
2618
+ "name": "model.norm.weight",
2619
+ "shape": [
2620
+ 2048
2621
+ ],
2622
+ "dtype": "float32",
2623
+ "format": "f32-to-bf16",
2624
+ "nbytes": 4096,
2625
+ "byteOffset": 18288640
2626
+ }
2627
+ ],
2628
+ "md5sum": "83f96e3340c8f435412897a17003888e"
2629
+ }
2630
+ ]
2631
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d3bad64c8100b0c52883e60e30dcd881fcefa6645e97c59063501e08faee552
3
+ size 32768000
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:717885f20831eea8e64c31ac705354828f285ac0baaebb24fe4b7d29449eab47
3
+ size 32768000
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:246789b20d4672f9c43545325d688becf849bd036640e016af021c9f3e04393c
3
+ size 24780800
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0494f25f78e5d7e570ee3d2f9ecb2f78a535c6113eccc7bb71ff55d90b61371a
3
+ size 24780800
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13036dd9822369d3cdce4adead967b2aca6d2299c785253133e98ff37520f631
3
+ size 24780800
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d644489b58d5c35c74232133e2209a396aedef6b55999bad3a991051436a4094
3
+ size 24780800
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dc43a7af68bb20e7ede6f59575489ae36eb9c1322f59e8a7b67462fc869109a
3
+ size 24780800
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73a6f9d55ada388a565294b1fb934ba816ad35cc5d0ff75a31e20e5bfd17ff5f
3
+ size 24780800
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8157502cddcfba1f0dbb55d3d2d2b62729b84346cabfc0e60ac70c650675aa09
3
+ size 24780800
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4735f98a0cf6266bb7e0b899338be6c050370e2017f27ec489362446c44d8da5
3
+ size 24780800
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b60c99668a7180729c5ee51beb4ab67d390feda1f7047f9637db1a051edd6c86
3
+ size 24780800
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62b1a25c22bcf96213a32ec90268cc5c588444b4408cf1070eb357e9749c7929
3
+ size 24780800
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a80ebf330d101f10851b63818bdddf67db5d683a3dee564dc70738756c8dc74
3
+ size 32976896
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecdfa79b4fb125356e6d7485eae4cc1d7b785ec6afd17776533893d820e77dc1
3
+ size 24780800
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3efe47d0c09393ae9dea3b75b18e4d7cb6a679e7cc72f5e88dc0672b50d41b6
3
+ size 24780800
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7102f63b0e8993992783465a175c72f87fbf3c10747ac1632feb5bc8036b079d
3
+ size 24780800
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:368585032efc3cc0c6467e70c63d200b48da3cfbb2962391eeb567116985cd28
3
+ size 18292736
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64b60ae5545af4a1d706c983020ff898b30b8e44c0fb9694d22a16723e1db0e3
3
+ size 31268864
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22674cb033c36301ced8a514c2e1c6614f55d52bc195927dae2c93478f8de0af
3
+ size 24780800
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eaecc43a8b233fefca2fefdfa818887948707417e9d739ab7f3b95cb47066fa
3
+ size 24780800
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7223f832e3b068a95de71f7c5eb295550e31369f7274ab42c5effc8bca8d1dbf
3
+ size 24780800
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab378797c3eff5859636e85702f4eca4c0c081e89d2ac9f4e8f8aa7730d5528
3
+ size 24780800
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c857ac90783c9f041609835f618c6c99025cd5c401296f9ff830a459360c4470
3
+ size 24780800
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64fb227256a0fa148d16190afa67c5ac1d29c5aa69c8f733c9289a95562558d6
3
+ size 24780800
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "bos_token": "<s>",
29
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
30
+ "clean_up_tokenization_spaces": false,
31
+ "eos_token": "</s>",
32
+ "legacy": false,
33
+ "model_max_length": 2048,
34
+ "pad_token": "</s>",
35
+ "padding_side": "right",
36
+ "sp_model_kwargs": {},
37
+ "tokenizer_class": "LlamaTokenizer",
38
+ "unk_token": "<unk>",
39
+ "use_default_system_prompt": false
40
+ }