|
{ |
|
"metadata": { |
|
"ParamSize": 443, |
|
"ParamBytes": 19677200384.0, |
|
"BitsPerParam": 4.500110418416715 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1048576000, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
256000, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576000, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "90172f952317e27f8f3eadb8a833b298" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 131072000, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
256000, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072000, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a15f399df006c9452a99a41845042bca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5f738e519bc173b908b7cd18431c29e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d32a88c4601b719449740f9e4e0ec941" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fa29ce568dff5f9093b97edb9a4e42ee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "311ea3067bb11bd40f2fb3df606135b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "0bf412fdfd72cfe1bbe6c3e9babc5fe5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "daaa92d0e8d1c22da6c49bca7992bcdd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "52ed1c455e6231bb1d81efc8ea1ccd20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23068672 |
|
} |
|
], |
|
"md5sum": "74bab5694c593c28b3305c6f25768503" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "32b73b10039081fcda2e8f03ead6841c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4af8ca8a3cfa630d59f3f21bbf769681" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "eeedbe99ab0d2825106928c56b3a36e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2cec07be2d0ca19654e2d98fa7eca397" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "66a5b3602033eb9a585e85c9c9430520" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c45f3c013b5fa9bc1bd2dca375ac79f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 28311552 |
|
} |
|
], |
|
"md5sum": "6df0a68f5dc645329ec73c6f53dbec14" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c48b9cbad61ecfde89049ccb16ceeaca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "df885a3f12a59d21a57b271cd425450e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "8a43afbc46b4f47643b80ec2f270aea9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a89395d538137c2626af121c3e309327" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c0225bde45c596864953614f93c7c4b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08b2f4ac06f8ae1ec6f75acf158bb21f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28311552, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
} |
|
], |
|
"md5sum": "78cd23ae9f746a2cca8764a440006b68" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a440a398a39fccfb6bc529d1c5238962" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ea9073c5f186702d27103febbc03b2db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cd6000f8526794813876bee9d86854a2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "2dcdc81c2dc2c48166f0f312265177b9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3e0eada4d26c11bf3ecc02c90371a904" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f610ece4bebf2bda87662f1646b748c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "f923c1c3c2a9915f5ae3fc13f2f74d81" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "174ca52dbbff01c7ec2c9b1a251447cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ae39caa4431c35187ab3dfc0e60c328c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08e177080ce65b8e4ada20b0ff6340be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "491c25bdf1f29a3ba8cc469171e9b03a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ba51df0a15d17f72950c3f8508ff6d4f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee158e48976b0a4f752ea621af2eefe8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "2e678b14a9b535566e914ce283aed8c5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "630bf903d2b3fc6792fcd06a0fa8a023" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cf1807b062ead015ab356cb5d10162fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fddb099a7a603f5587aaa0f1e4790bf0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 16777216 |
|
} |
|
], |
|
"md5sum": "96b454d533e8e46e1e54056186bedf15" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a53f4c8cb46520a53c480c2e4fc4422a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a407472cabc23f09e6e7a18e71ee4f7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00a87753b6dccedf6474bc210fd683f6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 4194304 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 4210688 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 15745024 |
|
} |
|
], |
|
"md5sum": "ba44f9f5726fc5788335535666d85536" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "062bc39233f4b393902539914ddd7bc1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6319e6fe2d4125ef4ae4f4710900f957" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11550720 |
|
} |
|
], |
|
"md5sum": "2267d4b075b04461331af478345a3dea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bafe82192c047fd83aa610ff553e69f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6a3183e9a24b5023975a66566bafd1a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23068672 |
|
} |
|
], |
|
"md5sum": "b0e1e37032c26cae6fb5a4e90104b984" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a26e02288d9f92793fb54db5686dd073" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b29a2dc235c83c0f6a2c5bb1790bed3d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "1016bf9c830979eb5669cfc1f5f127c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "352b92942fa213287d31bc1862a93db6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "02b433bce83998d618651e83b25798fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "763ee5cc10d9a37460f28283e3ba9cbf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 28311552 |
|
} |
|
], |
|
"md5sum": "7eb56b013d3fdd90f11add34c1b8d2b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c0c8021a97ddd046df310f9bb7d8288c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "10096f76d7daca95fc166815ad11cc0a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "64ac37c99647869c9e1172e3eb66416d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "470f0656a1c4844e6c129d0dbb964646" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ab87bec28f54e7d0d1e20575384b602" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78648b6d6155ad08fb1fdf4283aa297e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28311552, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
} |
|
], |
|
"md5sum": "15f23ef4f58a92efa7b004a76135e3cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "55a5919c45aad0a3e596a7ff042267a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f829762b8fde5110f5752e3fe7d0130" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7aa51b0926aa39f81bcf6914c42a3b62" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "9f496910a366614141673d03c9dd2123" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7c1166c80c716cf5641ca55fc580d36f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ef361e0c26558e693dd0e685d1ee44d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23068672 |
|
} |
|
], |
|
"md5sum": "37fafbfbd22d0e594fe30b20df7aee22" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "233989fcc58cb1859669fecc9ff30b76" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a01d3a559901adfc0b0be9edd3a1a912" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "a01687654d114f164f52d4a3d4455881" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8912ab6c9e7350b5d04b7b3ef616a059" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4b3fe93a9b35a0d71991ee02d660129f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "561b943391c70eddbd4609c269e46ec0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 28311552 |
|
} |
|
], |
|
"md5sum": "13a29c66235bf9e269b567f9709643e5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bedf8a29f0721e3ff9cd6177a2de886c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d3c1813501e3470778ed6134bf11b4a4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "8a7e46eda9b7c0f50579fbf93c399b19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd16d8ab1df70f9cc620b8006fccc1e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8f86312eccd96a065854a168d311b245" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bed53826334c452fad86027a2931b44f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28311552, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
} |
|
], |
|
"md5sum": "8672850bd802f6fd85a32098250c352c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8c8db04554ed307105c602f2b271e8a7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3a91389aa1454d55dda240ef2c173f47" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "82ef8d516e30cce3f970671ca1bec8c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "bf191b63697d6b3083dbb46968716d69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "85d7710d9b5942418510e8e8f73ef832" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc0ac1879d0e7a85df237b0670686edf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23068672 |
|
} |
|
], |
|
"md5sum": "825a0f1ce346e132789486bc7cb7caca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c6b546bdf400890f12f4935854083b45" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad162ab66314c082216fc64b7a2c05cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "57efd0f4890720b8213461cb7f8a0170" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b102d4399c024d170e85eeb1c2e1e7aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0a566b2b7b6e50182ee48c2a90486454" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f52fad0ae62ba3ebb09d167fe36cabf2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 28311552 |
|
} |
|
], |
|
"md5sum": "9b7f4b8b317951ab17672f451e837413" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "30a3f1200ab76bb1c2f7ebfab14e2b1f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "62b86d325dc23bec391aeb05ff401b86" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "5abf9fa19d23c4b07f056c00cf6aac18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a30c4cd40986f9bd34d0cfdd9cf744bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5f07898d671ae8a24a05e70fc93ad3c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4e914448c2897af9d820f219e27c8e94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28311552, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
} |
|
], |
|
"md5sum": "d594712b5570cb29e643ac8d7da02417" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "98d4348e76939438cc920909874596e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8a650003d99f8fe513eae3744fdc3f47" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8a0ea757d8c73c6e6d7acd2c0426f62b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "d9a7f585878eacce5f201612bafa3ef6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "79d35abeb80622d8cb280d0e444d56e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6eb9dab516a6f8112295ab675ecb341c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23068672 |
|
} |
|
], |
|
"md5sum": "a7b56a8dc7197c45c6d214c1c0329fa2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f4f80aa9e2358907d5a36165d5f69130" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "385c7cca884f9ead93cec68cac786429" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "edd6b2bc08e6049c2ae0bcbe666e7a15" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "65a934861ea4c69caaebbba00182032c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c7f0daf6eccdac9ba3af45a3cdc2feb7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "916de262e52897375e11be94e5760ba8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 28311552 |
|
} |
|
], |
|
"md5sum": "c70e20ddf27e185d3907d9d3d92bb5b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5e4f51bc86e941200c808b780a29fc3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "90ba3688af96b70188affbe87325817a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "2b80ae9a04dcffebbc2b3b1d92146505" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "58f6a1143983d49eac34ffdd952fd613" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ddd429d783e9b8579932772851c907c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1942776c8c0dab7f9c3027e3943298b8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28311552, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
} |
|
], |
|
"md5sum": "323a887ec171ce72b95c96611d5464da" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b6e3738c8f8d8baa95c88f389d2dbb9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ea6a6e2ce094336d16dd9ea07939b7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9edd49bb47d8b7e7ca926f8435999f4e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "4980ac8e8266e5d381944db73c0305b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4c7409a377e5ee4f659f9bfabd209e18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1dfb4cb0c3ce248d65c75047e4c2d0b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23068672 |
|
} |
|
], |
|
"md5sum": "f4feac9dcc6e53d980bca05ccf661a61" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d29c60d5faaa8bef98a44bc3ce451bc1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "21a1874290c3f3860773326c631d3bb4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "d0344fb95ceca1890aa71a945a0b3194" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f60b3f6a95f75abd43951d48ce007f88" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "14030af2882cd70601690811867a8042" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "250384cee7b34d8a1b59407e2d552a69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 28311552 |
|
} |
|
], |
|
"md5sum": "8c815231b227f92b4d9a5113e7cb981d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d570a3123feb214fa7489a6ecb179d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ced6f37b0b5fd730016a7227d3100270" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "b6ca95c5ac16a41d05697ba6dfc56fb5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_146.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d1ea5c60acd533f8e46b591087e8eef5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_147.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ab5e725e583a76b03849a1dda254e613" |
|
}, |
|
{ |
|
"dataPath": "params_shard_148.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a494a30b8ddfcf1a9a6278f64b573d1a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_149.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28311552, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
} |
|
], |
|
"md5sum": "4bce08f6e2ed28357955a324ce2feec9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_150.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e05f01861de1248d44f182229a1215a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_151.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8705c0f10d39579b358d32370a823b9b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_152.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "51b5ff4d83d971c3361238c94607069f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_153.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "e93fc5a370fbee2beafc33173c2e2206" |
|
}, |
|
{ |
|
"dataPath": "params_shard_154.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08a4e1432bf406cf005b451a5674cdc2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_155.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "01649d01d47e2e04b8a4fa8bae8ef674" |
|
}, |
|
{ |
|
"dataPath": "params_shard_156.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23068672 |
|
} |
|
], |
|
"md5sum": "f5f64bf567551b9e2e3912be3b4c81bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_157.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8bd169b00ae7def01c1b322c716cb22f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_158.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c0ec7e56d0728db56e158817a45f53b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_159.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "5623df5e01446c861eed9368a96cfe4b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_160.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6557beb113740a47bb0e6f87b8c8b3a3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_161.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b447a9f2bdf813740ce910ef65cb51ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_162.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3e4ad4b17cb80cc056c746663516dfc2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_163.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 28311552 |
|
} |
|
], |
|
"md5sum": "4db448f55aef5079bb3b972dc1eda472" |
|
}, |
|
{ |
|
"dataPath": "params_shard_164.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "904a623a956706aa0fa5af76bd29ced6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_165.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8544ec70973bd870422cca06d7e4322b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_166.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "3612c1415cfb5711e4a5cf4bb05b7215" |
|
}, |
|
{ |
|
"dataPath": "params_shard_167.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e6dfd82bcf8854b5763e6df6e722b1ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_168.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "19959f8c41c7bc1bd734162f7962b800" |
|
}, |
|
{ |
|
"dataPath": "params_shard_169.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8ee30a2e8c0ea1dd759ab9a1f6647579" |
|
}, |
|
{ |
|
"dataPath": "params_shard_170.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28311552, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
} |
|
], |
|
"md5sum": "d55b8cca2b07feaebeeb39efeb882898" |
|
}, |
|
{ |
|
"dataPath": "params_shard_171.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "decf42439c00a38540b0b78497235394" |
|
}, |
|
{ |
|
"dataPath": "params_shard_172.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "256182df76b0dd991065bbcc5eddba20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_173.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc7b84655d1a84f7cccecd31519b5548" |
|
}, |
|
{ |
|
"dataPath": "params_shard_174.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "175a32c435087d57f73c514433c433a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_175.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7969b6bd1627307c2c999df526c43497" |
|
}, |
|
{ |
|
"dataPath": "params_shard_176.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "270054b5b37e1b11f92b50796b1e8a2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_177.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23068672 |
|
} |
|
], |
|
"md5sum": "20e9bafc6f40df87415ed5d1da4c9ca8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_178.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "65b3b9a0b90e420ffdacfe28d3a2e512" |
|
}, |
|
{ |
|
"dataPath": "params_shard_179.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bd276f02a30e0b0ba3cef20bc120aba8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_180.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "8e16dc11466528b9070f0aefef610ddb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_181.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ff7d50f1d9fdfaaa921ece84e4b60d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_182.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "da602082ad3cb81a6fb6d448deea8a2a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_183.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d7f4404958ebc4030269b7c722df1588" |
|
}, |
|
{ |
|
"dataPath": "params_shard_184.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 28311552 |
|
} |
|
], |
|
"md5sum": "dec5ccccc71d25e094ed9b4690c2ae94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_185.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a561eb4820f06e6867820e7f160a8bea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_186.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f46a83a50b5d528f676ded352221a06" |
|
}, |
|
{ |
|
"dataPath": "params_shard_187.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "e9b81e08ce91a5fe4454410903ccd0a4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_188.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a90216d5c51ae158f14f0754e3389d8f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_189.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aee0323275395b86ab3239747354d600" |
|
}, |
|
{ |
|
"dataPath": "params_shard_190.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f8a8e0fcacba6f035b18427a82acf470" |
|
}, |
|
{ |
|
"dataPath": "params_shard_191.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28311552, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
} |
|
], |
|
"md5sum": "3aa3ca8b8247561f4c9cfc797fe93ff7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_192.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "38bdcc512a19fc28f1416f09debdf7e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_193.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ab0f421dc2298eb0b2d2abb43139f2e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_194.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b31ee355cb774201550ebc1ff303c79e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_195.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "a9c299554fb451b9b1277cca0ca13e51" |
|
}, |
|
{ |
|
"dataPath": "params_shard_196.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "67da15ff0c92dce1cdb4d95eb503fac7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_197.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "266bf35871c1889697b2d314bcd53597" |
|
}, |
|
{ |
|
"dataPath": "params_shard_198.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23068672 |
|
} |
|
], |
|
"md5sum": "4889baf39475cfb32a93512c397f99f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_199.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "13d6d7ea523f6b47520579f6c85b0575" |
|
}, |
|
{ |
|
"dataPath": "params_shard_200.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2e83a86ca989e63cae0d463f05888a8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_201.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "b293c034bc8aa088b7fce32c4578882f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_202.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "945e19d74838c0d4c700e83029e71e71" |
|
}, |
|
{ |
|
"dataPath": "params_shard_203.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "56c46b19341bac2fc72b995657e757fd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_204.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7dd0d96ec1815a55ed2c587577a59774" |
|
}, |
|
{ |
|
"dataPath": "params_shard_205.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
}, |
|
{ |
|
"name": "model.layers.32.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 28311552 |
|
} |
|
], |
|
"md5sum": "9d84edab70c0ca473c0d4e08c0e24048" |
|
}, |
|
{ |
|
"dataPath": "params_shard_206.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1ef298a7dae5fb550148b98687d3585" |
|
}, |
|
{ |
|
"dataPath": "params_shard_207.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c4e3befcf15fcdbfa2f45a803bc6dae3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_208.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.32.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "08bc2517b73a1ec3aff7722d1f8e64cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_209.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "46eb968739cef3f8349e6eebe45717cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_210.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00252f4ce1d302b5d184a804077d8ace" |
|
}, |
|
{ |
|
"dataPath": "params_shard_211.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be2e70ae1191e2e5b1d987b51b756094" |
|
}, |
|
{ |
|
"dataPath": "params_shard_212.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28311552, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
} |
|
], |
|
"md5sum": "571476198667e3c2ed12e6a14dbea23f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_213.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb7cbe407ab525852f6e49caf658ebb3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_214.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fbc57d00faa34815fd241d3c708b61ad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_215.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "af68e5967ae6fa81d9a4e6b42bdc68c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_216.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.33.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "73bab6601fad81d91a11848a9032b208" |
|
}, |
|
{ |
|
"dataPath": "params_shard_217.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c00efba3d4f9afb991d04bdbdc3d73d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_218.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "60ba21254300f2ec7123ff7c47e9a870" |
|
}, |
|
{ |
|
"dataPath": "params_shard_219.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.33.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.34.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23068672 |
|
} |
|
], |
|
"md5sum": "cbf8875735c9b1cb6660c9812b23f59b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_220.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a6923835ba69dc280e8511e2d683eea2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_221.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9407c6e4501f3cdeed0be81c1ea6064" |
|
}, |
|
{ |
|
"dataPath": "params_shard_222.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.34.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "ef2ccb684058fe655abd337a6bfab9d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_223.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a8d36e55399e732e826f11ae4ec5e851" |
|
}, |
|
{ |
|
"dataPath": "params_shard_224.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "563be7914aa22c8be3c246a43c0029ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_225.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f82d59e84cbca683b76c54b6fa0a98c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_226.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
}, |
|
{ |
|
"name": "model.layers.35.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 28311552 |
|
} |
|
], |
|
"md5sum": "3df8e982ffcefdfaeb9cfb038a799134" |
|
}, |
|
{ |
|
"dataPath": "params_shard_227.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ab22de5551e6af2df2315cf6d0d1167" |
|
}, |
|
{ |
|
"dataPath": "params_shard_228.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a8c232a082f55b472dbcb12b7e493171" |
|
}, |
|
{ |
|
"dataPath": "params_shard_229.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.35.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "4b8e46cc732669bdc6061632ea9ef405" |
|
}, |
|
{ |
|
"dataPath": "params_shard_230.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "63da5e5c89b84bc8f58465f62b6f32a1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_231.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5026084039b948a7e22d24908a4f6dbf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_232.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c9214df1271b9cd306085cd3494b973e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_233.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28311552, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
} |
|
], |
|
"md5sum": "35bbaabe89bac88dadb9652af8da4487" |
|
}, |
|
{ |
|
"dataPath": "params_shard_234.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f6414641142cd0cac7441c4e731b0c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_235.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f967fabdee8ec14f4cebd583498b8c40" |
|
}, |
|
{ |
|
"dataPath": "params_shard_236.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa00972926007b4df90826e8d9f3ef1e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_237.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.36.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "17031ac9726479211013d22e99a4b7bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_238.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "09f696fc0d957db534a1d0c6d24684cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_239.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "66afc062efcf65b868f713cf2627cadb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_240.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.36.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.37.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23068672 |
|
} |
|
], |
|
"md5sum": "857f114b60b3b8f20441154d601f884a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_241.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3fa66bbbf904e58e596908ffa6589873" |
|
}, |
|
{ |
|
"dataPath": "params_shard_242.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f6b04af9d306428aab8676d0e740ac7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_243.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.37.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "324c66d48e6d100ba90f19765c4cdd3d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_244.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d9fa4efd204707827e1fd29ac68b39b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_245.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f162529c939d1850a8df4d2a2d23f8f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_246.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b84bd3d28733275b88fb2130ba0853ad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_247.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
}, |
|
{ |
|
"name": "model.layers.38.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 28311552 |
|
} |
|
], |
|
"md5sum": "68f75c56c6fd6d34e64e7b1e0f72212e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_248.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "124ad70e3288ebca68279329f73861a0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_249.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9c342a4d8fdf2ab4f00efb2d84a5b727" |
|
}, |
|
{ |
|
"dataPath": "params_shard_250.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.38.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "906596a1f2c23f7a8792d1a515bd42b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_251.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "51f375558eba2a1947cc30047748c5df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_252.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1005631fa756c00cc8f3a029bff7775b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_253.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c768dd345e9f0542f238afb6a626fc0b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_254.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28311552, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 24117248 |
|
} |
|
], |
|
"md5sum": "089757ed552e9b43f0b839cd1b08b022" |
|
}, |
|
{ |
|
"dataPath": "params_shard_255.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ffdf8517fe9c8f58458f344a1438e29" |
|
}, |
|
{ |
|
"dataPath": "params_shard_256.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e7267130507f36cb7af083469bacaea8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_257.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8fda46b6e82fe362b06f2edde57ae524" |
|
}, |
|
{ |
|
"dataPath": "params_shard_258.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.39.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "f9c30ce50cfde57ce994555b8fed164a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_259.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "008e2a8740e6145639b9ef0c702df4ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_260.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "75081ae45c2d2b15d25ad519248a3163" |
|
}, |
|
{ |
|
"dataPath": "params_shard_261.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23101440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.39.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23068672 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23085056 |
|
} |
|
], |
|
"md5sum": "711cdccad9028f6975f529dac477b404" |
|
}, |
|
{ |
|
"dataPath": "params_shard_262.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a561a7f21d679d224a2e8de6f93f62e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_263.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a268b53595edc8915bd355593bdd1e65" |
|
}, |
|
{ |
|
"dataPath": "params_shard_264.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "287969efcf204344fcd9a15526eafeb3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_265.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "252163a4d71154a4f05fed13aee90ae8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_266.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4426a827bdb98e0ec16582f638294ad6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_267.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23085056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11550720 |
|
} |
|
], |
|
"md5sum": "84045f863aec7fd1f43750cb26732bee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_268.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5880ade62e34d7cce4793e1f345121be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_269.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2fc8759b704bcf9eb7d038684d9e7013" |
|
}, |
|
{ |
|
"dataPath": "params_shard_270.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "023c7d162f6c54dd163eeaec98f7abed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_271.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ec521b2b9feb69800be41703b1087b2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_272.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2816 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f60a461828d2977bbe620437c79d50d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_273.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dbea397fbfeb694cc148d1096c97294f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_274.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28327936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
704 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 16793600 |
|
} |
|
], |
|
"md5sum": "0d5349c354925f5b442513eb16724417" |
|
}, |
|
{ |
|
"dataPath": "params_shard_275.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.up_proj.q_weight", |
|
"shape": [ |
|
22528, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb92de0a6cd0e564b14c781e4e8491e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_276.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "473062a2cb2f45e845278b58c8b1ac0e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_277.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.up_proj.q_scale", |
|
"shape": [ |
|
22528, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 11534336 |
|
} |
|
], |
|
"md5sum": "ca862c3759a36ba30e529635383d4c13" |
|
}, |
|
{ |
|
"dataPath": "params_shard_278.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89b53ca5f34ff792d316d7036bd5f486" |
|
}, |
|
{ |
|
"dataPath": "params_shard_279.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
24576, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bfb0bf807caae04ed49b831e1d8979cf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_280.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.out_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "20ff683eb65ece3b8b00204e0fc9a7a4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_281.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
24576, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 16777216 |
|
} |
|
], |
|
"md5sum": "1498527f4562fd9d2dd9691346bb4597" |
|
}, |
|
{ |
|
"dataPath": "params_shard_282.bin", |
|
"format": "raw-shard", |
|
"nbytes": 4194304, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.out_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f8149cf91f163e76b17129dbbd6f3aa4" |
|
} |
|
] |
|
} |