{ "metadata": { "ParamSize": 443, "ParamBytes": 19677200384.0, "BitsPerParam": 4.500110418416715 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1048576000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 256000, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576000, "byteOffset": 0 } ], "md5sum": "90172f952317e27f8f3eadb8a833b298" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 131072000, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 256000, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 131072000, "byteOffset": 0 } ], "md5sum": "a15f399df006c9452a99a41845042bca" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5f738e519bc173b908b7cd18431c29e8" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d32a88c4601b719449740f9e4e0ec941" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "fa29ce568dff5f9093b97edb9a4e42ee" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.0.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "311ea3067bb11bd40f2fb3df606135b3" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "0bf412fdfd72cfe1bbe6c3e9babc5fe5" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.0.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "daaa92d0e8d1c22da6c49bca7992bcdd" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "52ed1c455e6231bb1d81efc8ea1ccd20" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.0.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.0.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23068672 } ], "md5sum": "74bab5694c593c28b3305c6f25768503" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.1.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "32b73b10039081fcda2e8f03ead6841c" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.1.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "4af8ca8a3cfa630d59f3f21bbf769681" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "eeedbe99ab0d2825106928c56b3a36e3" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2cec07be2d0ca19654e2d98fa7eca397" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "66a5b3602033eb9a585e85c9c9430520" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "c45f3c013b5fa9bc1bd2dca375ac79f3" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.1.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.1.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28311552 } ], "md5sum": "6df0a68f5dc645329ec73c6f53dbec14" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.2.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "c48b9cbad61ecfde89049ccb16ceeaca" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.2.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "df885a3f12a59d21a57b271cd425450e" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "8a43afbc46b4f47643b80ec2f270aea9" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a89395d538137c2626af121c3e309327" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c0225bde45c596864953614f93c7c4b1" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "08b2f4ac06f8ae1ec6f75acf158bb21f" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.2.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.2.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 } ], "md5sum": "78cd23ae9f746a2cca8764a440006b68" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a440a398a39fccfb6bc529d1c5238962" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "ea9073c5f186702d27103febbc03b2db" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.10.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "cd6000f8526794813876bee9d86854a2" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "2dcdc81c2dc2c48166f0f312265177b9" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.10.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "3e0eada4d26c11bf3ecc02c90371a904" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f610ece4bebf2bda87662f1646b748c2" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.10.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.10.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "f923c1c3c2a9915f5ae3fc13f2f74d81" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "174ca52dbbff01c7ec2c9b1a251447cd" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "ae39caa4431c35187ab3dfc0e60c328c" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.11.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "08e177080ce65b8e4ada20b0ff6340be" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "491c25bdf1f29a3ba8cc469171e9b03a" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.11.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "ba51df0a15d17f72950c3f8508ff6d4f" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ee158e48976b0a4f752ea621af2eefe8" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.11.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.11.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "2e678b14a9b535566e914ce283aed8c5" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "630bf903d2b3fc6792fcd06a0fa8a023" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "cf1807b062ead015ab356cb5d10162fe" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fddb099a7a603f5587aaa0f1e4790bf0" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16777216 } ], "md5sum": "96b454d533e8e46e1e54056186bedf15" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "a53f4c8cb46520a53c480c2e4fc4422a" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.9.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "a407472cabc23f09e6e7a18e71ee4f7b" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.9.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "00a87753b6dccedf6474bc210fd683f6" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 27279360, "records": [ { "name": "model.layers.12.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 4210688 }, { "name": "model.layers.9.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 15745024 } ], "md5sum": "ba44f9f5726fc5788335535666d85536" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "062bc39233f4b393902539914ddd7bc1" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.12.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "6319e6fe2d4125ef4ae4f4710900f957" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.9.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11534336 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11550720 } ], "md5sum": "2267d4b075b04461331af478345a3dea" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.12.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "bafe82192c047fd83aa610ff553e69f2" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "6a3183e9a24b5023975a66566bafd1a5" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.12.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.12.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23068672 } ], "md5sum": "b0e1e37032c26cae6fb5a4e90104b984" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.13.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "a26e02288d9f92793fb54db5686dd073" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.13.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "b29a2dc235c83c0f6a2c5bb1790bed3d" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "1016bf9c830979eb5669cfc1f5f127c7" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "352b92942fa213287d31bc1862a93db6" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "02b433bce83998d618651e83b25798fa" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "763ee5cc10d9a37460f28283e3ba9cbf" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.13.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.13.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28311552 } ], "md5sum": "7eb56b013d3fdd90f11add34c1b8d2b7" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.14.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "c0c8021a97ddd046df310f9bb7d8288c" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.14.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "10096f76d7daca95fc166815ad11cc0a" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "64ac37c99647869c9e1172e3eb66416d" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "470f0656a1c4844e6c129d0dbb964646" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1ab87bec28f54e7d0d1e20575384b602" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "78648b6d6155ad08fb1fdf4283aa297e" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.14.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.14.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 } ], "md5sum": "15f23ef4f58a92efa7b004a76135e3cc" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "55a5919c45aad0a3e596a7ff042267a8" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "0f829762b8fde5110f5752e3fe7d0130" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.15.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "7aa51b0926aa39f81bcf6914c42a3b62" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "9f496910a366614141673d03c9dd2123" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.15.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "7c1166c80c716cf5641ca55fc580d36f" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "6ef361e0c26558e693dd0e685d1ee44d" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.15.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.15.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23068672 } ], "md5sum": "37fafbfbd22d0e594fe30b20df7aee22" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.16.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "233989fcc58cb1859669fecc9ff30b76" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.16.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "a01d3a559901adfc0b0be9edd3a1a912" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "a01687654d114f164f52d4a3d4455881" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8912ab6c9e7350b5d04b7b3ef616a059" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4b3fe93a9b35a0d71991ee02d660129f" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "561b943391c70eddbd4609c269e46ec0" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.16.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.16.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28311552 } ], "md5sum": "13a29c66235bf9e269b567f9709643e5" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.17.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "bedf8a29f0721e3ff9cd6177a2de886c" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.17.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "d3c1813501e3470778ed6134bf11b4a4" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "8a7e46eda9b7c0f50579fbf93c399b19" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "dd16d8ab1df70f9cc620b8006fccc1e7" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8f86312eccd96a065854a168d311b245" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "bed53826334c452fad86027a2931b44f" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.17.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.17.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 } ], "md5sum": "8672850bd802f6fd85a32098250c352c" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8c8db04554ed307105c602f2b271e8a7" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "3a91389aa1454d55dda240ef2c173f47" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.18.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "82ef8d516e30cce3f970671ca1bec8c6" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "bf191b63697d6b3083dbb46968716d69" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.18.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "85d7710d9b5942418510e8e8f73ef832" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "cc0ac1879d0e7a85df237b0670686edf" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.18.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.18.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23068672 } ], "md5sum": "825a0f1ce346e132789486bc7cb7caca" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.19.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "c6b546bdf400890f12f4935854083b45" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.19.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "ad162ab66314c082216fc64b7a2c05cd" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "57efd0f4890720b8213461cb7f8a0170" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b102d4399c024d170e85eeb1c2e1e7aa" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0a566b2b7b6e50182ee48c2a90486454" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "f52fad0ae62ba3ebb09d167fe36cabf2" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.19.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.19.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28311552 } ], "md5sum": "9b7f4b8b317951ab17672f451e837413" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.20.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "30a3f1200ab76bb1c2f7ebfab14e2b1f" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.20.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "62b86d325dc23bec391aeb05ff401b86" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "5abf9fa19d23c4b07f056c00cf6aac18" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a30c4cd40986f9bd34d0cfdd9cf744bf" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5f07898d671ae8a24a05e70fc93ad3c9" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4e914448c2897af9d820f219e27c8e94" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.20.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.20.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 } ], "md5sum": "d594712b5570cb29e643ac8d7da02417" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "98d4348e76939438cc920909874596e9" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "8a650003d99f8fe513eae3744fdc3f47" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.21.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "8a0ea757d8c73c6e6d7acd2c0426f62b" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "d9a7f585878eacce5f201612bafa3ef6" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.21.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "79d35abeb80622d8cb280d0e444d56e7" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "6eb9dab516a6f8112295ab675ecb341c" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.21.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.21.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23068672 } ], "md5sum": "a7b56a8dc7197c45c6d214c1c0329fa2" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.22.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "f4f80aa9e2358907d5a36165d5f69130" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.22.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "385c7cca884f9ead93cec68cac786429" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "edd6b2bc08e6049c2ae0bcbe666e7a15" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "65a934861ea4c69caaebbba00182032c" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c7f0daf6eccdac9ba3af45a3cdc2feb7" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "916de262e52897375e11be94e5760ba8" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.22.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.22.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28311552 } ], "md5sum": "c70e20ddf27e185d3907d9d3d92bb5b1" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.23.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "5e4f51bc86e941200c808b780a29fc3e" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.23.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "90ba3688af96b70188affbe87325817a" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "2b80ae9a04dcffebbc2b3b1d92146505" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "58f6a1143983d49eac34ffdd952fd613" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ddd429d783e9b8579932772851c907c3" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1942776c8c0dab7f9c3027e3943298b8" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.23.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.23.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 } ], "md5sum": "323a887ec171ce72b95c96611d5464da" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0b6e3738c8f8d8baa95c88f389d2dbb9" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "4ea6a6e2ce094336d16dd9ea07939b7b" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.24.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "9edd49bb47d8b7e7ca926f8435999f4e" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "4980ac8e8266e5d381944db73c0305b5" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.24.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "4c7409a377e5ee4f659f9bfabd209e18" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "1dfb4cb0c3ce248d65c75047e4c2d0b0" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.24.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.24.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23068672 } ], "md5sum": "f4feac9dcc6e53d980bca05ccf661a61" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.25.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "d29c60d5faaa8bef98a44bc3ce451bc1" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.25.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "21a1874290c3f3860773326c631d3bb4" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "d0344fb95ceca1890aa71a945a0b3194" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f60b3f6a95f75abd43951d48ce007f88" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "14030af2882cd70601690811867a8042" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "250384cee7b34d8a1b59407e2d552a69" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.25.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.25.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28311552 } ], "md5sum": "8c815231b227f92b4d9a5113e7cb981d" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.26.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "6d570a3123feb214fa7489a6ecb179d5" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.26.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "ced6f37b0b5fd730016a7227d3100270" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "b6ca95c5ac16a41d05697ba6dfc56fb5" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d1ea5c60acd533f8e46b591087e8eef5" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ab5e725e583a76b03849a1dda254e613" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a494a30b8ddfcf1a9a6278f64b573d1a" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.26.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.26.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 } ], "md5sum": "4bce08f6e2ed28357955a324ce2feec9" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e05f01861de1248d44f182229a1215a6" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "8705c0f10d39579b358d32370a823b9b" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.27.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "51b5ff4d83d971c3361238c94607069f" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "e93fc5a370fbee2beafc33173c2e2206" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.27.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "08a4e1432bf406cf005b451a5674cdc2" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "01649d01d47e2e04b8a4fa8bae8ef674" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.27.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.27.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23068672 } ], "md5sum": "f5f64bf567551b9e2e3912be3b4c81bc" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.28.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "8bd169b00ae7def01c1b322c716cb22f" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.28.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "c0ec7e56d0728db56e158817a45f53b1" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "5623df5e01446c861eed9368a96cfe4b" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6557beb113740a47bb0e6f87b8c8b3a3" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b447a9f2bdf813740ce910ef65cb51ab" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "3e4ad4b17cb80cc056c746663516dfc2" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.28.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.28.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28311552 } ], "md5sum": "4db448f55aef5079bb3b972dc1eda472" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.29.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "904a623a956706aa0fa5af76bd29ced6" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.29.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "8544ec70973bd870422cca06d7e4322b" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "3612c1415cfb5711e4a5cf4bb05b7215" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e6dfd82bcf8854b5763e6df6e722b1ab" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "19959f8c41c7bc1bd734162f7962b800" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8ee30a2e8c0ea1dd759ab9a1f6647579" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.29.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.29.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 } ], "md5sum": "d55b8cca2b07feaebeeb39efeb882898" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "decf42439c00a38540b0b78497235394" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "256182df76b0dd991065bbcc5eddba20" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.3.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "cc7b84655d1a84f7cccecd31519b5548" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "175a32c435087d57f73c514433c433a8" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.3.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "7969b6bd1627307c2c999df526c43497" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "270054b5b37e1b11f92b50796b1e8a2f" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.3.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.3.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23068672 } ], "md5sum": "20e9bafc6f40df87415ed5d1da4c9ca8" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.4.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "65b3b9a0b90e420ffdacfe28d3a2e512" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.4.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "bd276f02a30e0b0ba3cef20bc120aba8" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "8e16dc11466528b9070f0aefef610ddb" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5ff7d50f1d9fdfaaa921ece84e4b60d2" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "da602082ad3cb81a6fb6d448deea8a2a" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "d7f4404958ebc4030269b7c722df1588" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.4.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.4.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28311552 } ], "md5sum": "dec5ccccc71d25e094ed9b4690c2ae94" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.5.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "a561eb4820f06e6867820e7f160a8bea" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.5.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "1f46a83a50b5d528f676ded352221a06" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "e9b81e08ce91a5fe4454410903ccd0a4" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a90216d5c51ae158f14f0754e3389d8f" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "aee0323275395b86ab3239747354d600" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f8a8e0fcacba6f035b18427a82acf470" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.5.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.5.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 } ], "md5sum": "3aa3ca8b8247561f4c9cfc797fe93ff7" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "38bdcc512a19fc28f1416f09debdf7e2" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "6ab0f421dc2298eb0b2d2abb43139f2e" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.30.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "b31ee355cb774201550ebc1ff303c79e" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "a9c299554fb451b9b1277cca0ca13e51" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.30.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "67da15ff0c92dce1cdb4d95eb503fac7" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "266bf35871c1889697b2d314bcd53597" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.30.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.30.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23068672 } ], "md5sum": "4889baf39475cfb32a93512c397f99f0" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.31.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "13d6d7ea523f6b47520579f6c85b0575" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.31.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "2e83a86ca989e63cae0d463f05888a8c" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.31.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "b293c034bc8aa088b7fce32c4578882f" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "945e19d74838c0d4c700e83029e71e71" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "56c46b19341bac2fc72b995657e757fd" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "7dd0d96ec1815a55ed2c587577a59774" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.31.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.31.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28311552 } ], "md5sum": "9d84edab70c0ca473c0d4e08c0e24048" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.32.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "f1ef298a7dae5fb550148b98687d3585" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.32.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "c4e3befcf15fcdbfa2f45a803bc6dae3" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.32.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "08bc2517b73a1ec3aff7722d1f8e64cc" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "46eb968739cef3f8349e6eebe45717cd" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "00252f4ce1d302b5d184a804077d8ace" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "be2e70ae1191e2e5b1d987b51b756094" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.32.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.32.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 } ], "md5sum": "571476198667e3c2ed12e6a14dbea23f" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bb7cbe407ab525852f6e49caf658ebb3" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "fbc57d00faa34815fd241d3c708b61ad" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.33.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "af68e5967ae6fa81d9a4e6b42bdc68c2" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "73bab6601fad81d91a11848a9032b208" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.33.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "c00efba3d4f9afb991d04bdbdc3d73d3" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "60ba21254300f2ec7123ff7c47e9a870" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.33.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.33.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23068672 } ], "md5sum": "cbf8875735c9b1cb6660c9812b23f59b" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.34.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "a6923835ba69dc280e8511e2d683eea2" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.34.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "b9407c6e4501f3cdeed0be81c1ea6064" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.34.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "ef2ccb684058fe655abd337a6bfab9d0" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a8d36e55399e732e826f11ae4ec5e851" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "563be7914aa22c8be3c246a43c0029ef" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "1f82d59e84cbca683b76c54b6fa0a98c" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.34.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.34.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28311552 } ], "md5sum": "3df8e982ffcefdfaeb9cfb038a799134" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.35.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "1ab22de5551e6af2df2315cf6d0d1167" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.35.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "a8c232a082f55b472dbcb12b7e493171" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.35.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "4b8e46cc732669bdc6061632ea9ef405" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "63da5e5c89b84bc8f58465f62b6f32a1" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5026084039b948a7e22d24908a4f6dbf" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c9214df1271b9cd306085cd3494b973e" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.35.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.35.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 } ], "md5sum": "35bbaabe89bac88dadb9652af8da4487" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9f6414641142cd0cac7441c4e731b0c9" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "f967fabdee8ec14f4cebd583498b8c40" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.36.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "aa00972926007b4df90826e8d9f3ef1e" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "17031ac9726479211013d22e99a4b7bf" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.36.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "09f696fc0d957db534a1d0c6d24684cc" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "66afc062efcf65b868f713cf2627cadb" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.36.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.36.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23068672 } ], "md5sum": "857f114b60b3b8f20441154d601f884a" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.37.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "3fa66bbbf904e58e596908ffa6589873" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.37.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "7f6b04af9d306428aab8676d0e740ac7" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.37.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "324c66d48e6d100ba90f19765c4cdd3d" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d9fa4efd204707827e1fd29ac68b39b7" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f162529c939d1850a8df4d2a2d23f8f0" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "b84bd3d28733275b88fb2130ba0853ad" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.37.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.37.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28311552 } ], "md5sum": "68f75c56c6fd6d34e64e7b1e0f72212e" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.38.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "124ad70e3288ebca68279329f73861a0" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.38.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "9c342a4d8fdf2ab4f00efb2d84a5b727" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.38.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "906596a1f2c23f7a8792d1a515bd42b4" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "51f375558eba2a1947cc30047748c5df" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1005631fa756c00cc8f3a029bff7775b" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c768dd345e9f0542f238afb6a626fc0b" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.38.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11534336 }, { "name": "model.layers.38.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24117248 } ], "md5sum": "089757ed552e9b43f0b839cd1b08b022" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4ffdf8517fe9c8f58458f344a1438e29" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "e7267130507f36cb7af083469bacaea8" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.39.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "8fda46b6e82fe362b06f2edde57ae524" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "f9c30ce50cfde57ce994555b8fed164a" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.39.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "008e2a8740e6145639b9ef0c702df4ff" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "75081ae45c2d2b15d25ad519248a3163" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 23101440, "records": [ { "name": "model.layers.39.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.39.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23068672 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23085056 } ], "md5sum": "711cdccad9028f6975f529dac477b404" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.6.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "a561a7f21d679d224a2e8de6f93f62e8" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.6.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "a268b53595edc8915bd355593bdd1e65" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "287969efcf204344fcd9a15526eafeb3" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "252163a4d71154a4f05fed13aee90ae8" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.7.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "4426a827bdb98e0ec16582f638294ad6" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 23085056, "records": [ { "name": "model.layers.6.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11534336 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11550720 } ], "md5sum": "84045f863aec7fd1f43750cb26732bee" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.7.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "5880ade62e34d7cce4793e1f345121be" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2fc8759b704bcf9eb7d038684d9e7013" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.7.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.7.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "023c7d162f6c54dd163eeaec98f7abed" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1ec521b2b9feb69800be41703b1087b2" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 2816 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "f60a461828d2977bbe620437c79d50d3" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.8.mlp.gate_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "dbea397fbfeb694cc148d1096c97294f" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16777216 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 704 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 16793600 } ], "md5sum": "0d5349c354925f5b442513eb16724417" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.8.mlp.up_proj.q_weight", "shape": [ 22528, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "bb92de0a6cd0e564b14c781e4e8491e4" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "473062a2cb2f45e845278b58c8b1ac0e" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.8.mlp.gate_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.8.mlp.up_proj.q_scale", "shape": [ 22528, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 11534336 } ], "md5sum": "ca862c3759a36ba30e529635383d4c13" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "89b53ca5f34ff792d316d7036bd5f486" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "bfb0bf807caae04ed49b831e1d8979cf" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "20ff683eb65ece3b8b00204e0fc9a7a4" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 12582912 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16777216 } ], "md5sum": "1498527f4562fd9d2dd9691346bb4597" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 4194304, "records": [ { "name": "model.layers.9.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 } ], "md5sum": "f8149cf91f163e76b17129dbbd6f3aa4" } ] }