|
{ |
|
"metadata": { |
|
"ParamSize": 405, |
|
"ParamBytes": 957816576.0, |
|
"BitsPerParam": 3.6548301671253403 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 86423040, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
122760, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 86423040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ead4888aead1362100efb31d0ab2d60a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33536448, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
122760, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10802880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 10802880 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 14452416 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 14908608 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 16125120 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 16277184 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 24387264 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 25401024 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 29382336 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 29880000 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 29883456 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 29886912 |
|
} |
|
], |
|
"md5sum": "e967cf9256473129d0dc331b8f6f34c5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30032640, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 456192 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 1672704 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 1824768 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 9934848 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 10948608 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 14929920 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 15427584 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 15431040 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 15434496 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 19540224 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 20756736 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 20908800 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 29018880 |
|
} |
|
], |
|
"md5sum": "3d2c55052080dbda3b596b94f9c9d312" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29044224, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 3981312 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 4478976 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 4482432 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 4485888 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 8135424 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 8591616 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 9808128 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 9960192 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 18070272 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 23065344 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 23563008 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 23566464 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 23569920 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 27219456 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 27675648 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 28892160 |
|
} |
|
], |
|
"md5sum": "422164e145615acd1bf1700e01139114" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32693760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 8110080 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 9123840 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 13105152 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13602816 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13606272 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 13609728 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 17259264 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 17715456 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 18931968 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 27194112 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 28207872 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 32189184 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32686848 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32690304 |
|
} |
|
], |
|
"md5sum": "62d69559bfdb0e59c3351f451d5472a7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32668416, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 3649536 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 4105728 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 5322240 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 5474304 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 13584384 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 14598144 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 18579456 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19077120 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19080576 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 22733568 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 23189760 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 24406272 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 24558336 |
|
} |
|
], |
|
"md5sum": "572fd97aa4ee48b4ae43a38e65df7033" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30057984, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 1013760 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 4995072 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5492736 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5496192 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 5499648 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 9149184 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 9605376 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 10821888 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 10973952 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 20097792 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 24079104 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24576768 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24580224 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 24583680 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 28233216 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 29905920 |
|
} |
|
], |
|
"md5sum": "6c564776fe1bea0d4f12d6e2642717e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32693760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 8110080 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 9123840 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 13105152 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13602816 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13606272 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 13609728 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 17259264 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 17715456 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 18931968 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 27194112 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 28207872 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 32189184 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32686848 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32690304 |
|
} |
|
], |
|
"md5sum": "3c36ee273d06831df6ceb3a98d5d858b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32668416, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 3649536 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 4105728 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 5322240 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 5474304 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 13584384 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 14598144 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 18579456 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19077120 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19080576 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 22733568 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 23189760 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 24406272 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 24558336 |
|
} |
|
], |
|
"md5sum": "b254a9b9e62dfb03b8d943132c6e5373" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30057984, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 1013760 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 4995072 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5492736 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5496192 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 5499648 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 9149184 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 9605376 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 10821888 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 10973952 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 20097792 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 24079104 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24576768 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24580224 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 24583680 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 28233216 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 29905920 |
|
} |
|
], |
|
"md5sum": "4100edf62d383e3687fca6f162a0d5dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32693760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 8110080 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 9123840 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 13105152 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13602816 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13606272 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 13609728 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 17259264 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 17715456 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 18931968 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 27194112 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 28207872 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 32189184 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32686848 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32690304 |
|
} |
|
], |
|
"md5sum": "cc74d6db135f604cf0f57917a4f43f75" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32668416, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 3649536 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 4105728 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 5322240 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 5474304 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 13584384 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 14598144 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 18579456 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19077120 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19080576 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 22733568 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 23189760 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 24406272 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 24558336 |
|
} |
|
], |
|
"md5sum": "90405e5daa3ccf9c01700e58c14ca380" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30057984, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 1013760 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 4995072 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5492736 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5496192 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 5499648 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 9149184 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 9605376 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 10821888 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 10973952 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 20097792 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 24079104 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24576768 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24580224 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 24583680 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 28233216 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 29905920 |
|
} |
|
], |
|
"md5sum": "5885cc444667a28be6152b4acedcff5c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32693760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 8110080 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 9123840 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 13105152 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13602816 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13606272 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 13609728 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 17259264 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 17715456 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 18931968 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 27194112 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 28207872 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 32189184 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32686848 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32690304 |
|
} |
|
], |
|
"md5sum": "225425f368ad9588015a44bbd81aa487" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32668416, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 3649536 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 4105728 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 5322240 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 5474304 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 13584384 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 14598144 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 18579456 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19077120 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19080576 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 22733568 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 23189760 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 24406272 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 24558336 |
|
} |
|
], |
|
"md5sum": "34cf7294e27f2bed1f2d0295fd4fd1cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30057984, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 1013760 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 4995072 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5492736 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5496192 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 5499648 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 9149184 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 9605376 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 10821888 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 10973952 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 20097792 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 24079104 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24576768 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24580224 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 24583680 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 28233216 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 29905920 |
|
} |
|
], |
|
"md5sum": "68c5da0a7d055f7ead2151e1ad6c6e2e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32693760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 8110080 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 9123840 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 13105152 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13602816 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13606272 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 13609728 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 17259264 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 17715456 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 18931968 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 27194112 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 28207872 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 32189184 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32686848 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32690304 |
|
} |
|
], |
|
"md5sum": "61678edaed6689a45e843be1aab67674" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32668416, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 3649536 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 4105728 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 5322240 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 5474304 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 13584384 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 14598144 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 18579456 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19077120 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19080576 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 22733568 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 23189760 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 24406272 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 24558336 |
|
} |
|
], |
|
"md5sum": "4cffc5bfd1ddc9286e3e0978b6f14616" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30057984, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 1013760 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 4995072 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5492736 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5496192 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 5499648 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 9149184 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 9605376 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 10821888 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 10973952 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 20097792 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 24079104 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24576768 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24580224 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 24583680 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 28233216 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 29905920 |
|
} |
|
], |
|
"md5sum": "5498d16e8feb4178807f85e91bf4337d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32693760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 8110080 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 9123840 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 13105152 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13602816 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13606272 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 13609728 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 17259264 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 17715456 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 18931968 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 27194112 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 28207872 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 32189184 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32686848 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32690304 |
|
} |
|
], |
|
"md5sum": "8a1b3ea33d49cc6d08c348595f701163" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32668416, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 3649536 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 4105728 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 5322240 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 5474304 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 13584384 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 14598144 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 18579456 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19077120 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19080576 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 22733568 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 23189760 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 24406272 |
|
}, |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 24558336 |
|
} |
|
], |
|
"md5sum": "79a6e3e95ae83753c715ef298b267abf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30057984, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 1013760 |
|
}, |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 4995072 |
|
}, |
|
{ |
|
"name": "model.layers.32.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5492736 |
|
}, |
|
{ |
|
"name": "model.layers.32.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5496192 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 5499648 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 9149184 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 9605376 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 10821888 |
|
}, |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 10973952 |
|
}, |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 20097792 |
|
}, |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 24079104 |
|
}, |
|
{ |
|
"name": "model.layers.33.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24576768 |
|
}, |
|
{ |
|
"name": "model.layers.33.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24580224 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 24583680 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 28233216 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 29905920 |
|
} |
|
], |
|
"md5sum": "d37956a1f2acf924760a8fa440c7ffb5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32693760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 8110080 |
|
}, |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 9123840 |
|
}, |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 13105152 |
|
}, |
|
{ |
|
"name": "model.layers.34.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13602816 |
|
}, |
|
{ |
|
"name": "model.layers.34.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13606272 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 13609728 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 17259264 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 17715456 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 18931968 |
|
}, |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 27194112 |
|
}, |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 28207872 |
|
}, |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 32189184 |
|
}, |
|
{ |
|
"name": "model.layers.35.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32686848 |
|
}, |
|
{ |
|
"name": "model.layers.35.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 32690304 |
|
} |
|
], |
|
"md5sum": "fdb96d136d4f16c56cc9686404b1dfa6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32668416, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 3649536 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 4105728 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 5322240 |
|
}, |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 5474304 |
|
}, |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 13584384 |
|
}, |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 14598144 |
|
}, |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 18579456 |
|
}, |
|
{ |
|
"name": "model.layers.36.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19077120 |
|
}, |
|
{ |
|
"name": "model.layers.36.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 19080576 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 22733568 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 23189760 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 24406272 |
|
}, |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 24558336 |
|
} |
|
], |
|
"md5sum": "d21f76d4a230df8b00a61d751fc22b9e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30057984, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 1013760 |
|
}, |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 4995072 |
|
}, |
|
{ |
|
"name": "model.layers.37.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5492736 |
|
}, |
|
{ |
|
"name": "model.layers.37.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 5496192 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 5499648 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 9149184 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 9605376 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 10821888 |
|
}, |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 10973952 |
|
}, |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 19084032 |
|
}, |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 20097792 |
|
}, |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 24079104 |
|
}, |
|
{ |
|
"name": "model.layers.38.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24576768 |
|
}, |
|
{ |
|
"name": "model.layers.38.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 24580224 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.wqkv_pack.q_weight", |
|
"shape": [ |
|
5184, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3649536, |
|
"byteOffset": 24583680 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.wqkv_pack.q_scale", |
|
"shape": [ |
|
5184, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 456192, |
|
"byteOffset": 28233216 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1216512, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 152064, |
|
"byteOffset": 29905920 |
|
} |
|
], |
|
"md5sum": "4d1f6e9c6c6f971c296b4591c171f34e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 86423040, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
122760, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 86423040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d9ab3f1728a1df243d862d114dd5218" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
11520, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8110080, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
11520, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1013760, |
|
"byteOffset": 8110080 |
|
}, |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
576 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3981312, |
|
"byteOffset": 9123840 |
|
}, |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1728, |
|
144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 497664, |
|
"byteOffset": 13105152 |
|
}, |
|
{ |
|
"name": "model.layers.39.input_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13602816 |
|
}, |
|
{ |
|
"name": "model.layers.39.post_attention_layernorm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13606272 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
1728 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3456, |
|
"byteOffset": 13609728 |
|
}, |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
122760, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10802880, |
|
"byteOffset": 13613184 |
|
} |
|
], |
|
"md5sum": "629eca0dd1f9f7dbcf78161534dadbd8" |
|
} |
|
] |
|
} |