diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,7105 @@ +{ + "metadata": { + "ParamSize": 443, + "ParamBytes": 19677200384.0, + "BitsPerParam": 4.500110418416715 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1048576000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 256000, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1048576000, + "byteOffset": 0 + } + ], + "md5sum": "90172f952317e27f8f3eadb8a833b298" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 256000, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "a15f399df006c9452a99a41845042bca" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5f738e519bc173b908b7cd18431c29e8" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d32a88c4601b719449740f9e4e0ec941" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "fa29ce568dff5f9093b97edb9a4e42ee" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.0.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "311ea3067bb11bd40f2fb3df606135b3" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "0bf412fdfd72cfe1bbe6c3e9babc5fe5" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.0.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "daaa92d0e8d1c22da6c49bca7992bcdd" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "52ed1c455e6231bb1d81efc8ea1ccd20" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.0.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23068672 + } + ], + "md5sum": "74bab5694c593c28b3305c6f25768503" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.1.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "32b73b10039081fcda2e8f03ead6841c" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.1.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "4af8ca8a3cfa630d59f3f21bbf769681" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "eeedbe99ab0d2825106928c56b3a36e3" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2cec07be2d0ca19654e2d98fa7eca397" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "66a5b3602033eb9a585e85c9c9430520" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "c45f3c013b5fa9bc1bd2dca375ac79f3" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.1.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.1.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28311552 + } + ], + "md5sum": "6df0a68f5dc645329ec73c6f53dbec14" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.2.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "c48b9cbad61ecfde89049ccb16ceeaca" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.2.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "df885a3f12a59d21a57b271cd425450e" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "8a43afbc46b4f47643b80ec2f270aea9" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a89395d538137c2626af121c3e309327" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c0225bde45c596864953614f93c7c4b1" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "08b2f4ac06f8ae1ec6f75acf158bb21f" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.layers.2.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.2.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + } + ], + "md5sum": "78cd23ae9f746a2cca8764a440006b68" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a440a398a39fccfb6bc529d1c5238962" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "ea9073c5f186702d27103febbc03b2db" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.10.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "cd6000f8526794813876bee9d86854a2" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "2dcdc81c2dc2c48166f0f312265177b9" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.10.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "3e0eada4d26c11bf3ecc02c90371a904" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f610ece4bebf2bda87662f1646b748c2" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.10.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "f923c1c3c2a9915f5ae3fc13f2f74d81" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "174ca52dbbff01c7ec2c9b1a251447cd" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "ae39caa4431c35187ab3dfc0e60c328c" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.11.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "08e177080ce65b8e4ada20b0ff6340be" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "491c25bdf1f29a3ba8cc469171e9b03a" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.11.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "ba51df0a15d17f72950c3f8508ff6d4f" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ee158e48976b0a4f752ea621af2eefe8" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.11.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "2e678b14a9b535566e914ce283aed8c5" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "630bf903d2b3fc6792fcd06a0fa8a023" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "cf1807b062ead015ab356cb5d10162fe" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fddb099a7a603f5587aaa0f1e4790bf0" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 16777216 + } + ], + "md5sum": "96b454d533e8e46e1e54056186bedf15" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "a53f4c8cb46520a53c480c2e4fc4422a" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.9.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "a407472cabc23f09e6e7a18e71ee4f7b" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.9.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "00a87753b6dccedf6474bc210fd683f6" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.12.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 4210688 + }, + { + "name": "model.layers.9.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 15745024 + } + ], + "md5sum": "ba44f9f5726fc5788335535666d85536" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "062bc39233f4b393902539914ddd7bc1" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.12.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "6319e6fe2d4125ef4ae4f4710900f957" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.9.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11534336 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11550720 + } + ], + "md5sum": "2267d4b075b04461331af478345a3dea" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.12.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "bafe82192c047fd83aa610ff553e69f2" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "6a3183e9a24b5023975a66566bafd1a5" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.12.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23068672 + } + ], + "md5sum": "b0e1e37032c26cae6fb5a4e90104b984" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.13.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "a26e02288d9f92793fb54db5686dd073" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.13.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "b29a2dc235c83c0f6a2c5bb1790bed3d" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "1016bf9c830979eb5669cfc1f5f127c7" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "352b92942fa213287d31bc1862a93db6" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "02b433bce83998d618651e83b25798fa" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "763ee5cc10d9a37460f28283e3ba9cbf" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.13.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.13.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28311552 + } + ], + "md5sum": "7eb56b013d3fdd90f11add34c1b8d2b7" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.14.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "c0c8021a97ddd046df310f9bb7d8288c" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.14.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "10096f76d7daca95fc166815ad11cc0a" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "64ac37c99647869c9e1172e3eb66416d" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "470f0656a1c4844e6c129d0dbb964646" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1ab87bec28f54e7d0d1e20575384b602" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "78648b6d6155ad08fb1fdf4283aa297e" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.layers.14.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.14.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + } + ], + "md5sum": "15f23ef4f58a92efa7b004a76135e3cc" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "55a5919c45aad0a3e596a7ff042267a8" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "0f829762b8fde5110f5752e3fe7d0130" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.15.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "7aa51b0926aa39f81bcf6914c42a3b62" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "9f496910a366614141673d03c9dd2123" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.15.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "7c1166c80c716cf5641ca55fc580d36f" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "6ef361e0c26558e693dd0e685d1ee44d" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.15.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23068672 + } + ], + "md5sum": "37fafbfbd22d0e594fe30b20df7aee22" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.16.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "233989fcc58cb1859669fecc9ff30b76" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.16.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "a01d3a559901adfc0b0be9edd3a1a912" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "a01687654d114f164f52d4a3d4455881" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8912ab6c9e7350b5d04b7b3ef616a059" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4b3fe93a9b35a0d71991ee02d660129f" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "561b943391c70eddbd4609c269e46ec0" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.16.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.16.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28311552 + } + ], + "md5sum": "13a29c66235bf9e269b567f9709643e5" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.17.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "bedf8a29f0721e3ff9cd6177a2de886c" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.17.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "d3c1813501e3470778ed6134bf11b4a4" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "8a7e46eda9b7c0f50579fbf93c399b19" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "dd16d8ab1df70f9cc620b8006fccc1e7" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8f86312eccd96a065854a168d311b245" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "bed53826334c452fad86027a2931b44f" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.layers.17.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.17.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + } + ], + "md5sum": "8672850bd802f6fd85a32098250c352c" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8c8db04554ed307105c602f2b271e8a7" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "3a91389aa1454d55dda240ef2c173f47" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.18.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "82ef8d516e30cce3f970671ca1bec8c6" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "bf191b63697d6b3083dbb46968716d69" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.18.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "85d7710d9b5942418510e8e8f73ef832" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "cc0ac1879d0e7a85df237b0670686edf" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.18.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23068672 + } + ], + "md5sum": "825a0f1ce346e132789486bc7cb7caca" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.19.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "c6b546bdf400890f12f4935854083b45" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.19.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "ad162ab66314c082216fc64b7a2c05cd" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "57efd0f4890720b8213461cb7f8a0170" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b102d4399c024d170e85eeb1c2e1e7aa" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0a566b2b7b6e50182ee48c2a90486454" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "f52fad0ae62ba3ebb09d167fe36cabf2" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.19.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.19.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28311552 + } + ], + "md5sum": "9b7f4b8b317951ab17672f451e837413" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.20.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "30a3f1200ab76bb1c2f7ebfab14e2b1f" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.20.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "62b86d325dc23bec391aeb05ff401b86" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.20.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "5abf9fa19d23c4b07f056c00cf6aac18" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a30c4cd40986f9bd34d0cfdd9cf744bf" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5f07898d671ae8a24a05e70fc93ad3c9" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "4e914448c2897af9d820f219e27c8e94" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.layers.20.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.20.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + } + ], + "md5sum": "d594712b5570cb29e643ac8d7da02417" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "98d4348e76939438cc920909874596e9" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "8a650003d99f8fe513eae3744fdc3f47" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.21.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "8a0ea757d8c73c6e6d7acd2c0426f62b" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "d9a7f585878eacce5f201612bafa3ef6" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.21.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "79d35abeb80622d8cb280d0e444d56e7" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "6eb9dab516a6f8112295ab675ecb341c" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.21.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23068672 + } + ], + "md5sum": "a7b56a8dc7197c45c6d214c1c0329fa2" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.22.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "f4f80aa9e2358907d5a36165d5f69130" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.22.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "385c7cca884f9ead93cec68cac786429" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "edd6b2bc08e6049c2ae0bcbe666e7a15" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "65a934861ea4c69caaebbba00182032c" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c7f0daf6eccdac9ba3af45a3cdc2feb7" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "916de262e52897375e11be94e5760ba8" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.22.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.22.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28311552 + } + ], + "md5sum": "c70e20ddf27e185d3907d9d3d92bb5b1" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.23.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "5e4f51bc86e941200c808b780a29fc3e" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.23.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "90ba3688af96b70188affbe87325817a" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "2b80ae9a04dcffebbc2b3b1d92146505" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "58f6a1143983d49eac34ffdd952fd613" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ddd429d783e9b8579932772851c907c3" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "1942776c8c0dab7f9c3027e3943298b8" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.layers.23.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.23.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + } + ], + "md5sum": "323a887ec171ce72b95c96611d5464da" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.24.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0b6e3738c8f8d8baa95c88f389d2dbb9" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "4ea6a6e2ce094336d16dd9ea07939b7b" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.24.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "9edd49bb47d8b7e7ca926f8435999f4e" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "4980ac8e8266e5d381944db73c0305b5" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.24.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "4c7409a377e5ee4f659f9bfabd209e18" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "1dfb4cb0c3ce248d65c75047e4c2d0b0" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.24.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23068672 + } + ], + "md5sum": "f4feac9dcc6e53d980bca05ccf661a61" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.25.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "d29c60d5faaa8bef98a44bc3ce451bc1" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.25.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "21a1874290c3f3860773326c631d3bb4" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "d0344fb95ceca1890aa71a945a0b3194" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f60b3f6a95f75abd43951d48ce007f88" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.25.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "14030af2882cd70601690811867a8042" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "250384cee7b34d8a1b59407e2d552a69" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.25.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.25.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28311552 + } + ], + "md5sum": "8c815231b227f92b4d9a5113e7cb981d" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.26.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "6d570a3123feb214fa7489a6ecb179d5" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.26.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "ced6f37b0b5fd730016a7227d3100270" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.26.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "b6ca95c5ac16a41d05697ba6dfc56fb5" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d1ea5c60acd533f8e46b591087e8eef5" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.26.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ab5e725e583a76b03849a1dda254e613" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a494a30b8ddfcf1a9a6278f64b573d1a" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.layers.26.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.26.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + } + ], + "md5sum": "4bce08f6e2ed28357955a324ce2feec9" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.27.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e05f01861de1248d44f182229a1215a6" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "8705c0f10d39579b358d32370a823b9b" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.27.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "51b5ff4d83d971c3361238c94607069f" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "e93fc5a370fbee2beafc33173c2e2206" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.27.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "08a4e1432bf406cf005b451a5674cdc2" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "01649d01d47e2e04b8a4fa8bae8ef674" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.27.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23068672 + } + ], + "md5sum": "f5f64bf567551b9e2e3912be3b4c81bc" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.28.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "8bd169b00ae7def01c1b322c716cb22f" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.28.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "c0ec7e56d0728db56e158817a45f53b1" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.28.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "5623df5e01446c861eed9368a96cfe4b" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6557beb113740a47bb0e6f87b8c8b3a3" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.28.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b447a9f2bdf813740ce910ef65cb51ab" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "3e4ad4b17cb80cc056c746663516dfc2" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.28.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.28.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28311552 + } + ], + "md5sum": "4db448f55aef5079bb3b972dc1eda472" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.29.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "904a623a956706aa0fa5af76bd29ced6" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.29.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "8544ec70973bd870422cca06d7e4322b" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "3612c1415cfb5711e4a5cf4bb05b7215" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e6dfd82bcf8854b5763e6df6e722b1ab" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.29.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "19959f8c41c7bc1bd734162f7962b800" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8ee30a2e8c0ea1dd759ab9a1f6647579" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.layers.29.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.29.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + } + ], + "md5sum": "d55b8cca2b07feaebeeb39efeb882898" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.30.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "decf42439c00a38540b0b78497235394" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "256182df76b0dd991065bbcc5eddba20" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.3.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "cc7b84655d1a84f7cccecd31519b5548" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "175a32c435087d57f73c514433c433a8" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.3.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "7969b6bd1627307c2c999df526c43497" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "270054b5b37e1b11f92b50796b1e8a2f" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.3.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23068672 + } + ], + "md5sum": "20e9bafc6f40df87415ed5d1da4c9ca8" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.4.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "65b3b9a0b90e420ffdacfe28d3a2e512" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.4.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "bd276f02a30e0b0ba3cef20bc120aba8" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "8e16dc11466528b9070f0aefef610ddb" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5ff7d50f1d9fdfaaa921ece84e4b60d2" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "da602082ad3cb81a6fb6d448deea8a2a" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "d7f4404958ebc4030269b7c722df1588" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.4.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.4.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28311552 + } + ], + "md5sum": "dec5ccccc71d25e094ed9b4690c2ae94" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.5.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "a561eb4820f06e6867820e7f160a8bea" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.5.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "1f46a83a50b5d528f676ded352221a06" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "e9b81e08ce91a5fe4454410903ccd0a4" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a90216d5c51ae158f14f0754e3389d8f" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "aee0323275395b86ab3239747354d600" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f8a8e0fcacba6f035b18427a82acf470" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.layers.5.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.5.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + } + ], + "md5sum": "3aa3ca8b8247561f4c9cfc797fe93ff7" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "38bdcc512a19fc28f1416f09debdf7e2" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "6ab0f421dc2298eb0b2d2abb43139f2e" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.30.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "b31ee355cb774201550ebc1ff303c79e" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "a9c299554fb451b9b1277cca0ca13e51" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.30.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "67da15ff0c92dce1cdb4d95eb503fac7" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "266bf35871c1889697b2d314bcd53597" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.30.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.30.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23068672 + } + ], + "md5sum": "4889baf39475cfb32a93512c397f99f0" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.31.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "13d6d7ea523f6b47520579f6c85b0575" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.31.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "2e83a86ca989e63cae0d463f05888a8c" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.31.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "b293c034bc8aa088b7fce32c4578882f" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "945e19d74838c0d4c700e83029e71e71" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.31.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "56c46b19341bac2fc72b995657e757fd" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "7dd0d96ec1815a55ed2c587577a59774" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.31.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.31.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28311552 + } + ], + "md5sum": "9d84edab70c0ca473c0d4e08c0e24048" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.32.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "f1ef298a7dae5fb550148b98687d3585" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.32.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "c4e3befcf15fcdbfa2f45a803bc6dae3" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.32.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "08bc2517b73a1ec3aff7722d1f8e64cc" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "46eb968739cef3f8349e6eebe45717cd" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.32.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "00252f4ce1d302b5d184a804077d8ace" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "be2e70ae1191e2e5b1d987b51b756094" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.layers.32.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.32.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + } + ], + "md5sum": "571476198667e3c2ed12e6a14dbea23f" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.33.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bb7cbe407ab525852f6e49caf658ebb3" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "fbc57d00faa34815fd241d3c708b61ad" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.33.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "af68e5967ae6fa81d9a4e6b42bdc68c2" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "73bab6601fad81d91a11848a9032b208" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.33.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "c00efba3d4f9afb991d04bdbdc3d73d3" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "60ba21254300f2ec7123ff7c47e9a870" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.33.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.33.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23068672 + } + ], + "md5sum": "cbf8875735c9b1cb6660c9812b23f59b" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.34.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "a6923835ba69dc280e8511e2d683eea2" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.34.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "b9407c6e4501f3cdeed0be81c1ea6064" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.34.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "ef2ccb684058fe655abd337a6bfab9d0" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a8d36e55399e732e826f11ae4ec5e851" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.34.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "563be7914aa22c8be3c246a43c0029ef" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "1f82d59e84cbca683b76c54b6fa0a98c" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.34.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.34.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28311552 + } + ], + "md5sum": "3df8e982ffcefdfaeb9cfb038a799134" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.35.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "1ab22de5551e6af2df2315cf6d0d1167" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.35.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "a8c232a082f55b472dbcb12b7e493171" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.35.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "4b8e46cc732669bdc6061632ea9ef405" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "63da5e5c89b84bc8f58465f62b6f32a1" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.35.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5026084039b948a7e22d24908a4f6dbf" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c9214df1271b9cd306085cd3494b973e" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.layers.35.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.35.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + } + ], + "md5sum": "35bbaabe89bac88dadb9652af8da4487" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.36.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9f6414641142cd0cac7441c4e731b0c9" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "f967fabdee8ec14f4cebd583498b8c40" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.36.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "aa00972926007b4df90826e8d9f3ef1e" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "17031ac9726479211013d22e99a4b7bf" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.36.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "09f696fc0d957db534a1d0c6d24684cc" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "66afc062efcf65b868f713cf2627cadb" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.36.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.36.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23068672 + } + ], + "md5sum": "857f114b60b3b8f20441154d601f884a" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.37.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "3fa66bbbf904e58e596908ffa6589873" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.37.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "7f6b04af9d306428aab8676d0e740ac7" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.37.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "324c66d48e6d100ba90f19765c4cdd3d" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d9fa4efd204707827e1fd29ac68b39b7" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.37.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f162529c939d1850a8df4d2a2d23f8f0" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "b84bd3d28733275b88fb2130ba0853ad" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.37.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.37.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28311552 + } + ], + "md5sum": "68f75c56c6fd6d34e64e7b1e0f72212e" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.38.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "124ad70e3288ebca68279329f73861a0" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.38.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "9c342a4d8fdf2ab4f00efb2d84a5b727" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.38.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "906596a1f2c23f7a8792d1a515bd42b4" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "51f375558eba2a1947cc30047748c5df" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.38.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1005631fa756c00cc8f3a029bff7775b" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c768dd345e9f0542f238afb6a626fc0b" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.layers.38.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11534336 + }, + { + "name": "model.layers.38.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24117248 + } + ], + "md5sum": "089757ed552e9b43f0b839cd1b08b022" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.39.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4ffdf8517fe9c8f58458f344a1438e29" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "e7267130507f36cb7af083469bacaea8" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.39.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "8fda46b6e82fe362b06f2edde57ae524" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "f9c30ce50cfde57ce994555b8fed164a" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.39.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "008e2a8740e6145639b9ef0c702df4ff" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "75081ae45c2d2b15d25ad519248a3163" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 23101440, + "records": [ + { + "name": "model.layers.39.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.39.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23068672 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23085056 + } + ], + "md5sum": "711cdccad9028f6975f529dac477b404" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.6.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "a561a7f21d679d224a2e8de6f93f62e8" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.6.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "a268b53595edc8915bd355593bdd1e65" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "287969efcf204344fcd9a15526eafeb3" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "252163a4d71154a4f05fed13aee90ae8" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.7.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "4426a827bdb98e0ec16582f638294ad6" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 23085056, + "records": [ + { + "name": "model.layers.6.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11534336 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11550720 + } + ], + "md5sum": "84045f863aec7fd1f43750cb26732bee" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.7.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "5880ade62e34d7cce4793e1f345121be" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2fc8759b704bcf9eb7d038684d9e7013" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.7.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "023c7d162f6c54dd163eeaec98f7abed" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1ec521b2b9feb69800be41703b1087b2" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2816 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "f60a461828d2977bbe620437c79d50d3" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.8.mlp.gate_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "dbea397fbfeb694cc148d1096c97294f" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 28327936, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16777216 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 8192, + 704 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 16793600 + } + ], + "md5sum": "0d5349c354925f5b442513eb16724417" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 92274688, + "records": [ + { + "name": "model.layers.8.mlp.up_proj.q_weight", + "shape": [ + 22528, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 92274688, + "byteOffset": 0 + } + ], + "md5sum": "bb92de0a6cd0e564b14c781e4e8491e4" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "473062a2cb2f45e845278b58c8b1ac0e" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.8.mlp.gate_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.up_proj.q_scale", + "shape": [ + 22528, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 11534336 + } + ], + "md5sum": "ca862c3759a36ba30e529635383d4c13" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "89b53ca5f34ff792d316d7036bd5f486" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 24576, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "bfb0bf807caae04ed49b831e1d8979cf" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.self_attn.out_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "20ff683eb65ece3b8b00204e0fc9a7a4" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 12582912 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 24576, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 16777216 + } + ], + "md5sum": "1498527f4562fd9d2dd9691346bb4597" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 4194304, + "records": [ + { + "name": "model.layers.9.self_attn.out_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + } + ], + "md5sum": "f8149cf91f163e76b17129dbbd6f3aa4" + } + ] +} \ No newline at end of file