kartmannXu's picture
Upload 33 files
3696187 verified
{
"metadata": {
"ParamSize": 405,
"ParamBytes": 957816576.0,
"BitsPerParam": 3.6548301671253403
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 86423040,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
122760,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 86423040,
"byteOffset": 0
}
],
"md5sum": "ead4888aead1362100efb31d0ab2d60a"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 33536448,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
122760,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10802880,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 10802880
},
{
"name": "model.layers.0.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 14452416
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 14908608
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 16125120
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 16277184
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 24387264
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 25401024
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 29382336
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 29880000
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 29883456
},
{
"name": "model.layers.1.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 29886912
}
],
"md5sum": "e967cf9256473129d0dc331b8f6f34c5"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 30032640,
"records": [
{
"name": "model.layers.1.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 456192
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 1672704
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 1824768
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 9934848
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 10948608
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 14929920
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 15427584
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 15431040
},
{
"name": "model.layers.2.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 15434496
},
{
"name": "model.layers.2.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 19084032
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 19540224
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 20756736
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 20908800
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 29018880
}
],
"md5sum": "3d2c55052080dbda3b596b94f9c9d312"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29044224,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 3981312
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 4478976
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 4482432
},
{
"name": "model.layers.3.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 4485888
},
{
"name": "model.layers.3.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 8135424
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 8591616
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 9808128
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 9960192
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 18070272
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 19084032
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 23065344
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 23563008
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 23566464
},
{
"name": "model.layers.4.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 23569920
},
{
"name": "model.layers.4.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 27219456
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 27675648
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 28892160
}
],
"md5sum": "422164e145615acd1bf1700e01139114"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 32693760,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 8110080
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 9123840
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 13105152
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13602816
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13606272
},
{
"name": "model.layers.5.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 13609728
},
{
"name": "model.layers.5.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 17259264
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 17715456
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 18931968
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 19084032
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 27194112
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 28207872
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 32189184
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32686848
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32690304
}
],
"md5sum": "62d69559bfdb0e59c3351f451d5472a7"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 32668416,
"records": [
{
"name": "model.layers.6.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 3649536
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 4105728
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 5322240
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 5474304
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 13584384
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 14598144
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 18579456
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19077120
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19080576
},
{
"name": "model.layers.7.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 19084032
},
{
"name": "model.layers.7.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 22733568
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 23189760
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 24406272
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 24558336
}
],
"md5sum": "572fd97aa4ee48b4ae43a38e65df7033"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 30057984,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 1013760
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 4995072
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5492736
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5496192
},
{
"name": "model.layers.8.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 5499648
},
{
"name": "model.layers.8.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 9149184
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 9605376
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 10821888
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 10973952
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 19084032
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 20097792
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 24079104
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24576768
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24580224
},
{
"name": "model.layers.9.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 24583680
},
{
"name": "model.layers.9.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 28233216
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 28689408
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 29905920
}
],
"md5sum": "6c564776fe1bea0d4f12d6e2642717e2"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 32693760,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 8110080
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 9123840
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 13105152
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13602816
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13606272
},
{
"name": "model.layers.10.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 13609728
},
{
"name": "model.layers.10.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 17259264
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 17715456
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 18931968
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 19084032
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 27194112
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 28207872
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 32189184
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32686848
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32690304
}
],
"md5sum": "3c36ee273d06831df6ceb3a98d5d858b"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 32668416,
"records": [
{
"name": "model.layers.11.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 3649536
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 4105728
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 5322240
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 5474304
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 13584384
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 14598144
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 18579456
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19077120
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19080576
},
{
"name": "model.layers.12.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 19084032
},
{
"name": "model.layers.12.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 22733568
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 23189760
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 24406272
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 24558336
}
],
"md5sum": "b254a9b9e62dfb03b8d943132c6e5373"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 30057984,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 1013760
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 4995072
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5492736
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5496192
},
{
"name": "model.layers.13.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 5499648
},
{
"name": "model.layers.13.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 9149184
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 9605376
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 10821888
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 10973952
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 19084032
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 20097792
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 24079104
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24576768
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24580224
},
{
"name": "model.layers.14.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 24583680
},
{
"name": "model.layers.14.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 28233216
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 28689408
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 29905920
}
],
"md5sum": "4100edf62d383e3687fca6f162a0d5dd"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 32693760,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 8110080
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 9123840
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 13105152
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13602816
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13606272
},
{
"name": "model.layers.15.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 13609728
},
{
"name": "model.layers.15.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 17259264
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 17715456
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 18931968
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 19084032
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 27194112
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 28207872
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 32189184
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32686848
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32690304
}
],
"md5sum": "cc74d6db135f604cf0f57917a4f43f75"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 32668416,
"records": [
{
"name": "model.layers.16.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 3649536
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 4105728
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 5322240
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 5474304
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 13584384
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 14598144
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 18579456
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19077120
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19080576
},
{
"name": "model.layers.17.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 19084032
},
{
"name": "model.layers.17.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 22733568
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 23189760
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 24406272
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 24558336
}
],
"md5sum": "90405e5daa3ccf9c01700e58c14ca380"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 30057984,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 1013760
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 4995072
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5492736
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5496192
},
{
"name": "model.layers.18.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 5499648
},
{
"name": "model.layers.18.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 9149184
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 9605376
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 10821888
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 10973952
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 19084032
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 20097792
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 24079104
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24576768
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24580224
},
{
"name": "model.layers.19.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 24583680
},
{
"name": "model.layers.19.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 28233216
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 28689408
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 29905920
}
],
"md5sum": "5885cc444667a28be6152b4acedcff5c"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 32693760,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 8110080
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 9123840
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 13105152
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13602816
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13606272
},
{
"name": "model.layers.20.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 13609728
},
{
"name": "model.layers.20.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 17259264
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 17715456
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 18931968
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 19084032
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 27194112
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 28207872
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 32189184
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32686848
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32690304
}
],
"md5sum": "225425f368ad9588015a44bbd81aa487"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 32668416,
"records": [
{
"name": "model.layers.21.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 3649536
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 4105728
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 5322240
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 5474304
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 13584384
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 14598144
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 18579456
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19077120
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19080576
},
{
"name": "model.layers.22.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 19084032
},
{
"name": "model.layers.22.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 22733568
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 23189760
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 24406272
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 24558336
}
],
"md5sum": "34cf7294e27f2bed1f2d0295fd4fd1cc"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 30057984,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 1013760
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 4995072
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5492736
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5496192
},
{
"name": "model.layers.23.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 5499648
},
{
"name": "model.layers.23.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 9149184
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 9605376
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 10821888
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 10973952
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 19084032
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 20097792
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 24079104
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24576768
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24580224
},
{
"name": "model.layers.24.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 24583680
},
{
"name": "model.layers.24.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 28233216
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 28689408
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 29905920
}
],
"md5sum": "68c5da0a7d055f7ead2151e1ad6c6e2e"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 32693760,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 8110080
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 9123840
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 13105152
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13602816
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13606272
},
{
"name": "model.layers.25.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 13609728
},
{
"name": "model.layers.25.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 17259264
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 17715456
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 18931968
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 19084032
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 27194112
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 28207872
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 32189184
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32686848
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32690304
}
],
"md5sum": "61678edaed6689a45e843be1aab67674"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 32668416,
"records": [
{
"name": "model.layers.26.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 3649536
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 4105728
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 5322240
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 5474304
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 13584384
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 14598144
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 18579456
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19077120
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19080576
},
{
"name": "model.layers.27.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 19084032
},
{
"name": "model.layers.27.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 22733568
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 23189760
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 24406272
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 24558336
}
],
"md5sum": "4cffc5bfd1ddc9286e3e0978b6f14616"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 30057984,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 1013760
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 4995072
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5492736
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5496192
},
{
"name": "model.layers.28.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 5499648
},
{
"name": "model.layers.28.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 9149184
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 9605376
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 10821888
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 10973952
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 19084032
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 20097792
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 24079104
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24576768
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24580224
},
{
"name": "model.layers.29.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 24583680
},
{
"name": "model.layers.29.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 28233216
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 28689408
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 29905920
}
],
"md5sum": "5498d16e8feb4178807f85e91bf4337d"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 32693760,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 8110080
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 9123840
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 13105152
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13602816
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13606272
},
{
"name": "model.layers.30.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 13609728
},
{
"name": "model.layers.30.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 17259264
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 17715456
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 18931968
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 19084032
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 27194112
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 28207872
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 32189184
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32686848
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32690304
}
],
"md5sum": "8a1b3ea33d49cc6d08c348595f701163"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 32668416,
"records": [
{
"name": "model.layers.31.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 3649536
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 4105728
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 5322240
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 5474304
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 13584384
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 14598144
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 18579456
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19077120
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19080576
},
{
"name": "model.layers.32.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 19084032
},
{
"name": "model.layers.32.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 22733568
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 23189760
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 24406272
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 24558336
}
],
"md5sum": "79a6e3e95ae83753c715ef298b267abf"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 30057984,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 0
},
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 1013760
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 4995072
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5492736
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5496192
},
{
"name": "model.layers.33.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 5499648
},
{
"name": "model.layers.33.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 9149184
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 9605376
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 10821888
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 10973952
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 19084032
},
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 20097792
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 24079104
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24576768
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24580224
},
{
"name": "model.layers.34.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 24583680
},
{
"name": "model.layers.34.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 28233216
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 28689408
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 29905920
}
],
"md5sum": "d37956a1f2acf924760a8fa440c7ffb5"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 32693760,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 0
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 8110080
},
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 9123840
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 13105152
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13602816
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13606272
},
{
"name": "model.layers.35.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 13609728
},
{
"name": "model.layers.35.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 17259264
},
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 17715456
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 18931968
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 19084032
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 27194112
},
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 28207872
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 32189184
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32686848
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 32690304
}
],
"md5sum": "fdb96d136d4f16c56cc9686404b1dfa6"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 32668416,
"records": [
{
"name": "model.layers.36.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 3649536
},
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 4105728
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 5322240
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 5474304
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 13584384
},
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 14598144
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 18579456
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19077120
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 19080576
},
{
"name": "model.layers.37.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 19084032
},
{
"name": "model.layers.37.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 22733568
},
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 23189760
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 24406272
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 24558336
}
],
"md5sum": "d21f76d4a230df8b00a61d751fc22b9e"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 30057984,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 0
},
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 1013760
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 4995072
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5492736
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 5496192
},
{
"name": "model.layers.38.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 5499648
},
{
"name": "model.layers.38.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 9149184
},
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 9605376
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 10821888
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 10973952
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 19084032
},
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 20097792
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 24079104
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24576768
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 24580224
},
{
"name": "model.layers.39.self_attn.wqkv_pack.q_weight",
"shape": [
5184,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3649536,
"byteOffset": 24583680
},
{
"name": "model.layers.39.self_attn.wqkv_pack.q_scale",
"shape": [
5184,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 456192,
"byteOffset": 28233216
},
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
1728,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1216512,
"byteOffset": 28689408
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
1728,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 152064,
"byteOffset": 29905920
}
],
"md5sum": "4d1f6e9c6c6f971c296b4591c171f34e"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 86423040,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
122760,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 86423040,
"byteOffset": 0
}
],
"md5sum": "9d9ab3f1728a1df243d862d114dd5218"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 24416064,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
11520,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8110080,
"byteOffset": 0
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
11520,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1013760,
"byteOffset": 8110080
},
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
1728,
576
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3981312,
"byteOffset": 9123840
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
1728,
144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 497664,
"byteOffset": 13105152
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13602816
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13606272
},
{
"name": "model.norm.weight",
"shape": [
1728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3456,
"byteOffset": 13609728
},
{
"name": "lm_head.q_scale",
"shape": [
122760,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10802880,
"byteOffset": 13613184
}
],
"md5sum": "629eca0dd1f9f7dbcf78161534dadbd8"
}
]
}