Command-R-q4f16_1-MLC / ndarray-cache.json
TNT3530's picture
Upload folder using huggingface_hub
cc34e7a verified
{
"metadata": {
"ParamSize": 443,
"ParamBytes": 19677200384.0,
"BitsPerParam": 4.500110418416715
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1048576000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
256000,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576000,
"byteOffset": 0
}
],
"md5sum": "90172f952317e27f8f3eadb8a833b298"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 131072000,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
256000,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072000,
"byteOffset": 0
}
],
"md5sum": "a15f399df006c9452a99a41845042bca"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5f738e519bc173b908b7cd18431c29e8"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d32a88c4601b719449740f9e4e0ec941"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "fa29ce568dff5f9093b97edb9a4e42ee"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.0.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "311ea3067bb11bd40f2fb3df606135b3"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "0bf412fdfd72cfe1bbe6c3e9babc5fe5"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.0.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "daaa92d0e8d1c22da6c49bca7992bcdd"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "52ed1c455e6231bb1d81efc8ea1ccd20"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.0.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23068672
}
],
"md5sum": "74bab5694c593c28b3305c6f25768503"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.1.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "32b73b10039081fcda2e8f03ead6841c"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.1.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "4af8ca8a3cfa630d59f3f21bbf769681"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "eeedbe99ab0d2825106928c56b3a36e3"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "2cec07be2d0ca19654e2d98fa7eca397"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "66a5b3602033eb9a585e85c9c9430520"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "c45f3c013b5fa9bc1bd2dca375ac79f3"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.1.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.1.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28311552
}
],
"md5sum": "6df0a68f5dc645329ec73c6f53dbec14"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.2.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "c48b9cbad61ecfde89049ccb16ceeaca"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.2.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "df885a3f12a59d21a57b271cd425450e"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "8a43afbc46b4f47643b80ec2f270aea9"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a89395d538137c2626af121c3e309327"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c0225bde45c596864953614f93c7c4b1"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "08b2f4ac06f8ae1ec6f75acf158bb21f"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.2.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.2.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
}
],
"md5sum": "78cd23ae9f746a2cca8764a440006b68"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a440a398a39fccfb6bc529d1c5238962"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "ea9073c5f186702d27103febbc03b2db"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.10.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "cd6000f8526794813876bee9d86854a2"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "2dcdc81c2dc2c48166f0f312265177b9"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.10.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "3e0eada4d26c11bf3ecc02c90371a904"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f610ece4bebf2bda87662f1646b748c2"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.10.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "f923c1c3c2a9915f5ae3fc13f2f74d81"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "174ca52dbbff01c7ec2c9b1a251447cd"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "ae39caa4431c35187ab3dfc0e60c328c"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.11.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "08e177080ce65b8e4ada20b0ff6340be"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "491c25bdf1f29a3ba8cc469171e9b03a"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.11.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "ba51df0a15d17f72950c3f8508ff6d4f"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ee158e48976b0a4f752ea621af2eefe8"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.11.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "2e678b14a9b535566e914ce283aed8c5"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "630bf903d2b3fc6792fcd06a0fa8a023"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "cf1807b062ead015ab356cb5d10162fe"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fddb099a7a603f5587aaa0f1e4790bf0"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16777216
}
],
"md5sum": "96b454d533e8e46e1e54056186bedf15"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "a53f4c8cb46520a53c480c2e4fc4422a"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.9.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "a407472cabc23f09e6e7a18e71ee4f7b"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.9.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "00a87753b6dccedf6474bc210fd683f6"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.12.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 4210688
},
{
"name": "model.layers.9.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 15745024
}
],
"md5sum": "ba44f9f5726fc5788335535666d85536"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "062bc39233f4b393902539914ddd7bc1"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.12.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "6319e6fe2d4125ef4ae4f4710900f957"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.9.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11534336
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11550720
}
],
"md5sum": "2267d4b075b04461331af478345a3dea"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.12.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "bafe82192c047fd83aa610ff553e69f2"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "6a3183e9a24b5023975a66566bafd1a5"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.12.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23068672
}
],
"md5sum": "b0e1e37032c26cae6fb5a4e90104b984"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.13.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "a26e02288d9f92793fb54db5686dd073"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.13.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "b29a2dc235c83c0f6a2c5bb1790bed3d"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "1016bf9c830979eb5669cfc1f5f127c7"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "352b92942fa213287d31bc1862a93db6"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "02b433bce83998d618651e83b25798fa"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "763ee5cc10d9a37460f28283e3ba9cbf"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.13.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.13.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28311552
}
],
"md5sum": "7eb56b013d3fdd90f11add34c1b8d2b7"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.14.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "c0c8021a97ddd046df310f9bb7d8288c"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.14.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "10096f76d7daca95fc166815ad11cc0a"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "64ac37c99647869c9e1172e3eb66416d"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "470f0656a1c4844e6c129d0dbb964646"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1ab87bec28f54e7d0d1e20575384b602"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "78648b6d6155ad08fb1fdf4283aa297e"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.14.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.14.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
}
],
"md5sum": "15f23ef4f58a92efa7b004a76135e3cc"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "55a5919c45aad0a3e596a7ff042267a8"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "0f829762b8fde5110f5752e3fe7d0130"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.15.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "7aa51b0926aa39f81bcf6914c42a3b62"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "9f496910a366614141673d03c9dd2123"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.15.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "7c1166c80c716cf5641ca55fc580d36f"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "6ef361e0c26558e693dd0e685d1ee44d"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.15.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23068672
}
],
"md5sum": "37fafbfbd22d0e594fe30b20df7aee22"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.16.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "233989fcc58cb1859669fecc9ff30b76"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.16.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "a01d3a559901adfc0b0be9edd3a1a912"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "a01687654d114f164f52d4a3d4455881"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8912ab6c9e7350b5d04b7b3ef616a059"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4b3fe93a9b35a0d71991ee02d660129f"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "561b943391c70eddbd4609c269e46ec0"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.16.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.16.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28311552
}
],
"md5sum": "13a29c66235bf9e269b567f9709643e5"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.17.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "bedf8a29f0721e3ff9cd6177a2de886c"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.17.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "d3c1813501e3470778ed6134bf11b4a4"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "8a7e46eda9b7c0f50579fbf93c399b19"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "dd16d8ab1df70f9cc620b8006fccc1e7"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8f86312eccd96a065854a168d311b245"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "bed53826334c452fad86027a2931b44f"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.17.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.17.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
}
],
"md5sum": "8672850bd802f6fd85a32098250c352c"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8c8db04554ed307105c602f2b271e8a7"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "3a91389aa1454d55dda240ef2c173f47"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.18.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "82ef8d516e30cce3f970671ca1bec8c6"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "bf191b63697d6b3083dbb46968716d69"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.18.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "85d7710d9b5942418510e8e8f73ef832"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "cc0ac1879d0e7a85df237b0670686edf"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.18.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23068672
}
],
"md5sum": "825a0f1ce346e132789486bc7cb7caca"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.19.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "c6b546bdf400890f12f4935854083b45"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.19.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "ad162ab66314c082216fc64b7a2c05cd"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "57efd0f4890720b8213461cb7f8a0170"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b102d4399c024d170e85eeb1c2e1e7aa"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0a566b2b7b6e50182ee48c2a90486454"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "f52fad0ae62ba3ebb09d167fe36cabf2"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.19.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.19.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28311552
}
],
"md5sum": "9b7f4b8b317951ab17672f451e837413"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.20.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "30a3f1200ab76bb1c2f7ebfab14e2b1f"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.20.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "62b86d325dc23bec391aeb05ff401b86"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "5abf9fa19d23c4b07f056c00cf6aac18"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a30c4cd40986f9bd34d0cfdd9cf744bf"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.20.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5f07898d671ae8a24a05e70fc93ad3c9"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "4e914448c2897af9d820f219e27c8e94"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.20.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.20.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
}
],
"md5sum": "d594712b5570cb29e643ac8d7da02417"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.21.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "98d4348e76939438cc920909874596e9"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "8a650003d99f8fe513eae3744fdc3f47"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.21.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "8a0ea757d8c73c6e6d7acd2c0426f62b"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "d9a7f585878eacce5f201612bafa3ef6"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.21.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "79d35abeb80622d8cb280d0e444d56e7"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "6eb9dab516a6f8112295ab675ecb341c"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.21.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23068672
}
],
"md5sum": "a7b56a8dc7197c45c6d214c1c0329fa2"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.22.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "f4f80aa9e2358907d5a36165d5f69130"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.22.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "385c7cca884f9ead93cec68cac786429"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "edd6b2bc08e6049c2ae0bcbe666e7a15"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "65a934861ea4c69caaebbba00182032c"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.22.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c7f0daf6eccdac9ba3af45a3cdc2feb7"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "916de262e52897375e11be94e5760ba8"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.22.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.22.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28311552
}
],
"md5sum": "c70e20ddf27e185d3907d9d3d92bb5b1"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.23.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "5e4f51bc86e941200c808b780a29fc3e"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.23.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "90ba3688af96b70188affbe87325817a"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "2b80ae9a04dcffebbc2b3b1d92146505"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "58f6a1143983d49eac34ffdd952fd613"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.23.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ddd429d783e9b8579932772851c907c3"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "1942776c8c0dab7f9c3027e3943298b8"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.23.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.23.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
}
],
"md5sum": "323a887ec171ce72b95c96611d5464da"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.24.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0b6e3738c8f8d8baa95c88f389d2dbb9"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "4ea6a6e2ce094336d16dd9ea07939b7b"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.24.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "9edd49bb47d8b7e7ca926f8435999f4e"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "4980ac8e8266e5d381944db73c0305b5"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.24.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "4c7409a377e5ee4f659f9bfabd209e18"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "1dfb4cb0c3ce248d65c75047e4c2d0b0"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.24.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23068672
}
],
"md5sum": "f4feac9dcc6e53d980bca05ccf661a61"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.25.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "d29c60d5faaa8bef98a44bc3ce451bc1"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.25.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "21a1874290c3f3860773326c631d3bb4"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "d0344fb95ceca1890aa71a945a0b3194"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f60b3f6a95f75abd43951d48ce007f88"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.25.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "14030af2882cd70601690811867a8042"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "250384cee7b34d8a1b59407e2d552a69"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.25.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.25.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28311552
}
],
"md5sum": "8c815231b227f92b4d9a5113e7cb981d"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.26.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "6d570a3123feb214fa7489a6ecb179d5"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.26.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "ced6f37b0b5fd730016a7227d3100270"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "b6ca95c5ac16a41d05697ba6dfc56fb5"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d1ea5c60acd533f8e46b591087e8eef5"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.26.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ab5e725e583a76b03849a1dda254e613"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a494a30b8ddfcf1a9a6278f64b573d1a"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.26.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.26.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
}
],
"md5sum": "4bce08f6e2ed28357955a324ce2feec9"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.27.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e05f01861de1248d44f182229a1215a6"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "8705c0f10d39579b358d32370a823b9b"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.27.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "51b5ff4d83d971c3361238c94607069f"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "e93fc5a370fbee2beafc33173c2e2206"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.27.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "08a4e1432bf406cf005b451a5674cdc2"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "01649d01d47e2e04b8a4fa8bae8ef674"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.27.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23068672
}
],
"md5sum": "f5f64bf567551b9e2e3912be3b4c81bc"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.28.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "8bd169b00ae7def01c1b322c716cb22f"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.28.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "c0ec7e56d0728db56e158817a45f53b1"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.28.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "5623df5e01446c861eed9368a96cfe4b"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "6557beb113740a47bb0e6f87b8c8b3a3"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.28.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b447a9f2bdf813740ce910ef65cb51ab"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "3e4ad4b17cb80cc056c746663516dfc2"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.28.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.28.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28311552
}
],
"md5sum": "4db448f55aef5079bb3b972dc1eda472"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.29.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "904a623a956706aa0fa5af76bd29ced6"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.29.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "8544ec70973bd870422cca06d7e4322b"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "3612c1415cfb5711e4a5cf4bb05b7215"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e6dfd82bcf8854b5763e6df6e722b1ab"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.29.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "19959f8c41c7bc1bd734162f7962b800"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8ee30a2e8c0ea1dd759ab9a1f6647579"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.29.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.29.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
}
],
"md5sum": "d55b8cca2b07feaebeeb39efeb882898"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.30.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "decf42439c00a38540b0b78497235394"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "256182df76b0dd991065bbcc5eddba20"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.3.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "cc7b84655d1a84f7cccecd31519b5548"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "175a32c435087d57f73c514433c433a8"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.3.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "7969b6bd1627307c2c999df526c43497"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "270054b5b37e1b11f92b50796b1e8a2f"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.3.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23068672
}
],
"md5sum": "20e9bafc6f40df87415ed5d1da4c9ca8"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.4.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "65b3b9a0b90e420ffdacfe28d3a2e512"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.4.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "bd276f02a30e0b0ba3cef20bc120aba8"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "8e16dc11466528b9070f0aefef610ddb"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5ff7d50f1d9fdfaaa921ece84e4b60d2"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "da602082ad3cb81a6fb6d448deea8a2a"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "d7f4404958ebc4030269b7c722df1588"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.4.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.4.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28311552
}
],
"md5sum": "dec5ccccc71d25e094ed9b4690c2ae94"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.5.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "a561eb4820f06e6867820e7f160a8bea"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.5.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "1f46a83a50b5d528f676ded352221a06"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "e9b81e08ce91a5fe4454410903ccd0a4"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a90216d5c51ae158f14f0754e3389d8f"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "aee0323275395b86ab3239747354d600"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f8a8e0fcacba6f035b18427a82acf470"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.5.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.5.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
}
],
"md5sum": "3aa3ca8b8247561f4c9cfc797fe93ff7"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "38bdcc512a19fc28f1416f09debdf7e2"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "6ab0f421dc2298eb0b2d2abb43139f2e"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.30.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "b31ee355cb774201550ebc1ff303c79e"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "a9c299554fb451b9b1277cca0ca13e51"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.30.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "67da15ff0c92dce1cdb4d95eb503fac7"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "266bf35871c1889697b2d314bcd53597"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.30.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.30.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23068672
}
],
"md5sum": "4889baf39475cfb32a93512c397f99f0"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.31.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "13d6d7ea523f6b47520579f6c85b0575"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.31.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "2e83a86ca989e63cae0d463f05888a8c"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.31.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "b293c034bc8aa088b7fce32c4578882f"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "945e19d74838c0d4c700e83029e71e71"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.31.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "56c46b19341bac2fc72b995657e757fd"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "7dd0d96ec1815a55ed2c587577a59774"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.31.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.31.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28311552
}
],
"md5sum": "9d84edab70c0ca473c0d4e08c0e24048"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.32.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "f1ef298a7dae5fb550148b98687d3585"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.32.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "c4e3befcf15fcdbfa2f45a803bc6dae3"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.32.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "08bc2517b73a1ec3aff7722d1f8e64cc"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "46eb968739cef3f8349e6eebe45717cd"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.32.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "00252f4ce1d302b5d184a804077d8ace"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "be2e70ae1191e2e5b1d987b51b756094"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.32.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.32.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
}
],
"md5sum": "571476198667e3c2ed12e6a14dbea23f"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.33.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "bb7cbe407ab525852f6e49caf658ebb3"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "fbc57d00faa34815fd241d3c708b61ad"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.33.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "af68e5967ae6fa81d9a4e6b42bdc68c2"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "73bab6601fad81d91a11848a9032b208"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.33.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "c00efba3d4f9afb991d04bdbdc3d73d3"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "60ba21254300f2ec7123ff7c47e9a870"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.33.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.33.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23068672
}
],
"md5sum": "cbf8875735c9b1cb6660c9812b23f59b"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.34.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "a6923835ba69dc280e8511e2d683eea2"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.34.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "b9407c6e4501f3cdeed0be81c1ea6064"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.34.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "ef2ccb684058fe655abd337a6bfab9d0"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a8d36e55399e732e826f11ae4ec5e851"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.34.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "563be7914aa22c8be3c246a43c0029ef"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "1f82d59e84cbca683b76c54b6fa0a98c"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.34.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.34.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28311552
}
],
"md5sum": "3df8e982ffcefdfaeb9cfb038a799134"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.35.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "1ab22de5551e6af2df2315cf6d0d1167"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.35.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "a8c232a082f55b472dbcb12b7e493171"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.35.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "4b8e46cc732669bdc6061632ea9ef405"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "63da5e5c89b84bc8f58465f62b6f32a1"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.35.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5026084039b948a7e22d24908a4f6dbf"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "c9214df1271b9cd306085cd3494b973e"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.35.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.35.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
}
],
"md5sum": "35bbaabe89bac88dadb9652af8da4487"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.36.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9f6414641142cd0cac7441c4e731b0c9"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "f967fabdee8ec14f4cebd583498b8c40"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.36.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "aa00972926007b4df90826e8d9f3ef1e"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "17031ac9726479211013d22e99a4b7bf"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.36.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "09f696fc0d957db534a1d0c6d24684cc"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "66afc062efcf65b868f713cf2627cadb"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.36.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.36.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23068672
}
],
"md5sum": "857f114b60b3b8f20441154d601f884a"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.37.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "3fa66bbbf904e58e596908ffa6589873"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.37.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "7f6b04af9d306428aab8676d0e740ac7"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.37.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "324c66d48e6d100ba90f19765c4cdd3d"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d9fa4efd204707827e1fd29ac68b39b7"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.37.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f162529c939d1850a8df4d2a2d23f8f0"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "b84bd3d28733275b88fb2130ba0853ad"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.37.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.37.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28311552
}
],
"md5sum": "68f75c56c6fd6d34e64e7b1e0f72212e"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.38.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "124ad70e3288ebca68279329f73861a0"
},
{
"dataPath": "params_shard_249.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.38.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "9c342a4d8fdf2ab4f00efb2d84a5b727"
},
{
"dataPath": "params_shard_250.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.38.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "906596a1f2c23f7a8792d1a515bd42b4"
},
{
"dataPath": "params_shard_251.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "51f375558eba2a1947cc30047748c5df"
},
{
"dataPath": "params_shard_252.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.38.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1005631fa756c00cc8f3a029bff7775b"
},
{
"dataPath": "params_shard_253.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "c768dd345e9f0542f238afb6a626fc0b"
},
{
"dataPath": "params_shard_254.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.38.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11534336
},
{
"name": "model.layers.38.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24117248
}
],
"md5sum": "089757ed552e9b43f0b839cd1b08b022"
},
{
"dataPath": "params_shard_255.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.39.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4ffdf8517fe9c8f58458f344a1438e29"
},
{
"dataPath": "params_shard_256.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "e7267130507f36cb7af083469bacaea8"
},
{
"dataPath": "params_shard_257.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.39.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "8fda46b6e82fe362b06f2edde57ae524"
},
{
"dataPath": "params_shard_258.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "f9c30ce50cfde57ce994555b8fed164a"
},
{
"dataPath": "params_shard_259.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.39.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "008e2a8740e6145639b9ef0c702df4ff"
},
{
"dataPath": "params_shard_260.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "75081ae45c2d2b15d25ad519248a3163"
},
{
"dataPath": "params_shard_261.bin",
"format": "raw-shard",
"nbytes": 23101440,
"records": [
{
"name": "model.layers.39.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.39.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
},
{
"name": "model.norm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23068672
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 23085056
}
],
"md5sum": "711cdccad9028f6975f529dac477b404"
},
{
"dataPath": "params_shard_262.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.6.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "a561a7f21d679d224a2e8de6f93f62e8"
},
{
"dataPath": "params_shard_263.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.6.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "a268b53595edc8915bd355593bdd1e65"
},
{
"dataPath": "params_shard_264.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "287969efcf204344fcd9a15526eafeb3"
},
{
"dataPath": "params_shard_265.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "252163a4d71154a4f05fed13aee90ae8"
},
{
"dataPath": "params_shard_266.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.7.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "4426a827bdb98e0ec16582f638294ad6"
},
{
"dataPath": "params_shard_267.bin",
"format": "raw-shard",
"nbytes": 23085056,
"records": [
{
"name": "model.layers.6.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11534336
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11550720
}
],
"md5sum": "84045f863aec7fd1f43750cb26732bee"
},
{
"dataPath": "params_shard_268.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.7.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "5880ade62e34d7cce4793e1f345121be"
},
{
"dataPath": "params_shard_269.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "2fc8759b704bcf9eb7d038684d9e7013"
},
{
"dataPath": "params_shard_270.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.7.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "023c7d162f6c54dd163eeaec98f7abed"
},
{
"dataPath": "params_shard_271.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1ec521b2b9feb69800be41703b1087b2"
},
{
"dataPath": "params_shard_272.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
8192,
2816
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "f60a461828d2977bbe620437c79d50d3"
},
{
"dataPath": "params_shard_273.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.8.mlp.gate_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "dbea397fbfeb694cc148d1096c97294f"
},
{
"dataPath": "params_shard_274.bin",
"format": "raw-shard",
"nbytes": 28327936,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16777216
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
8192,
704
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 16793600
}
],
"md5sum": "0d5349c354925f5b442513eb16724417"
},
{
"dataPath": "params_shard_275.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.8.mlp.up_proj.q_weight",
"shape": [
22528,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "bb92de0a6cd0e564b14c781e4e8491e4"
},
{
"dataPath": "params_shard_276.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "473062a2cb2f45e845278b58c8b1ac0e"
},
{
"dataPath": "params_shard_277.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.8.mlp.gate_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.up_proj.q_scale",
"shape": [
22528,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 11534336
}
],
"md5sum": "ca862c3759a36ba30e529635383d4c13"
},
{
"dataPath": "params_shard_278.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "89b53ca5f34ff792d316d7036bd5f486"
},
{
"dataPath": "params_shard_279.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "bfb0bf807caae04ed49b831e1d8979cf"
},
{
"dataPath": "params_shard_280.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "20ff683eb65ece3b8b00204e0fc9a7a4"
},
{
"dataPath": "params_shard_281.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 12582912
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16777216
}
],
"md5sum": "1498527f4562fd9d2dd9691346bb4597"
},
{
"dataPath": "params_shard_282.bin",
"format": "raw-shard",
"nbytes": 4194304,
"records": [
{
"name": "model.layers.9.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
}
],
"md5sum": "f8149cf91f163e76b17129dbbd6f3aa4"
}
]
}