{ "metadata": { "ParamSize": 805, "ParamBytes": 36381212672.0, "BitsPerParam": 3.4891900844756822 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "lm_head.q_weight", "shape": [ 1024, 128256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "3b0091e9675cc30ddb562de5cb1cab5e" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3a43a86822c5e49eaf8595148ddfdc34" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "69e0fdab069fb773fedddb29fb22b68c" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2d46c9826b05e7a1a0e279024349ef76" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b8b996ff6926b0d0dece216fae944db0" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31145984, "records": [ { "name": "lm_head.q_scale", "shape": [ 64, 128256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16416768, "byteOffset": 0 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16416768 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 16433152 }, { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 20103168 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27443200 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27459584 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27475968 } ], "md5sum": "acbf98c398c7c7c6a7d589798af9a02e" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "03e8049c5efae78d74a6873212c47400" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b2fee82bc2d3464527e6955701c04db5" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "3ed7d852262933b734b9bba319aebb07" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c27430a98147150a548ee3cb6501fbdf" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "dec9d07681f193e5710ae63173651aa0" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29835264, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16416768, "byteOffset": 9732096 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26148864 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26165248 } ], "md5sum": "485028e71938369726372cc7236b8193" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d4cc8135ba9815fe19f8b4123848cfe6" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a381a980eb9aa939ef1cc8ac02e9b9cb" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b8d402908c98802d22bf64726dd267c3" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a2463d47f75b41e1ec85296ed8c639ed" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7c1421417fbb514c97634adb9b0bb3a5" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b69df290d68d83a27dab3033d8595761" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cd3db516ab1e6a0b7d147b79c4644223" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a87c267bd82f65c3049189d2de58e550" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "036c34cfcc4488495405970d6f57b055" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e5d9aeb590424acdd5d7a655ce9a970e" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7347305b0555b91f4204c548410439c9" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9c56806f024ffb8d107cbca09ee69e3b" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1ef97c8e65548bff191cefa15759d527" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "eb8ac00a7f5346a97dfa7a372b277976" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9661dbc7c82ccfa14168279e0ce6cfc8" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5e055ee66ae61e2d7322b296dd2665a5" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f92174821315fec7ff46b7f6fb78baa8" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bce36c27c70d0769ceb90af67446de1e" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "541b711de627ec9b5a2e57ef5ee6d76e" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "b9ac5d48eff64e9479b98a080a101a1e" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "49fdd1a0ba5fc5b19880417815e9d6da" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f0883e5acd98e70b7ce891516ddd9286" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bc26b1f52db76cda3ecd3978d2ddc0ef" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d6fd046d58bcb4a7d2ba5066a445bea4" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "175c740c5eac48f9c9284411b351769c" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d1056b8baba677309d79dbf195fb3e54" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "694c89aa4961a35a721b12d02c81ecc0" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c8d51bb1f739bca3ab04b48850064948" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c1cb588ff6ce5768225fddde59b9f661" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "74f99c1bbe2ec0c1af1954e44d64b4ad" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 32833536, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 23117824 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 30457856 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31768576 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32817152 } ], "md5sum": "67c64e9912a8c17fbfb90b6eed268cd6" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a85aaa64f233af4409ad4a7a65b0734c" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "52d022b7f63ce5fc87b3dec3a7433288" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b87b6c72703a03c76e4b4356970943ea" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "01880e9dac55abe325c0a5df33db3544" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0fe6182f6b9d7ad46cfcfa156643a180" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0886b0505cb2808ef5896fb170087560" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0b3aa94305646f5cd1997362ab4f73dc" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 28147712, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11042816 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11059200 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11075584 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14745600 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22085632 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 22102016 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 23412736 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24461312 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24477696 } ], "md5sum": "a69b1a00e0f03670dc8220472e6f3a2d" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2eb895ac057e6e8fe293a678b7cb8fa7" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b17f9a9fa8e7fd1797c06c179bfcd727" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6ae3460edfd8f0117090f9dba1f17bac" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1172c849d7202f3c134613fd219d0138" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "828357220a73cf0c40931fe5ffff7250" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ed046ccd9a0beade388797eec274b253" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "44e4b485206e24956754b605be86977a" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cae21301a85c2384b6df3d66c3589a74" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "bbf46a89c4230c48976ef5ee1282bd19" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "04aa0add0f01ea34883763fb0d34210d" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6ce2eb2b5f3e3f3ace4a5e75ffdd2b4c" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1ca3b9357fb19f8ff54b085236cc140c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ca2d034efb249fc7b9abba04d959ce9e" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "81d48e1e95ad28bd9d66c4ac215a534e" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5a89c14712f3d1e0195a396f7d3a6c13" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c061b36e640a648713bd13b7c1c7e135" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e9c1bf5bb12e5723c24c5871b3f59e1b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "10e6a87d1d87df654e3eb837d38cc46f" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "4c63435584c97a2ae0700824ebdcea92" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "652b557091be2132ace13fbe7be14cd8" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4310066876c6744c756f9dd45d0aaa5b" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8516eccbe851c7f3919002979c68dcc7" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "67a9a5d6171b183c241e5a46e6e34912" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a45b6d0301e3b0d7e04a93eff939aa19" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2cd4c12e41fb4a0e157c29c1ea22880f" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "55b47ae04aa85e82e0cf489e7bd8f773" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bf1296bd3b4ecdd56320b015fe0e362b" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 32817152, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 25477120 } ], "md5sum": "bf322f6875acb082b4b0d624d1bfc704" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "04a170dc8f130c80b8bb764e83c03849" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9b7933859fc129c7f04ed8a37a5bb3ac" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "80d9675e1682207de30d286b168d4da6" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c54b8e34eb619cc2709fb3b195cba96b" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ae3ba30a3f2b0986101d7850c297b43a" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "46408d269c5de476d67e677b3c89c956" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "719d85a8ed48b5bee9004b61b338275f" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "593142af28b802520ab5d29217eeebc1" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ab4297fd143cd96b14255be05f4e5716" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c0e4344ec700186a1e6559da04397e1c" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 29196288, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1310720 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2375680 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392064 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2408448 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6078464 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13418496 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 13434880 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14745600 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15794176 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 15810560 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 19480576 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26820608 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26836992 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28147712 } ], "md5sum": "57e78e21784a0a17ce584773995012af" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "86d953e8543a95ac19950e5e9caa51df" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c88ba27a47a309f8ff7e74bd9bab63ce" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3431f69ec8cbdc5fc3d645e354a04380" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "749ed7fd8fa09d109ac71a3af4643651" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f0f444c81f4d0c8288e09001e3b2ccda" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "aeb555ad521cc355cab33cfd49b774a1" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bb17f34ba42366ccffc4cba7b1c530b7" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3f3fc27fd133177eb52081abbeb963c1" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b4615e6816db6a422151ead0fe98a707" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 30490624, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9715712 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13402112 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13418496 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 17088512 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 24444928 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25755648 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26804224 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26820608 } ], "md5sum": "eb0bafef76c8f18029be3a589cca368c" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3e47b974cf7990f3fbc90abbeb1e73a2" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b8422de621108f2b545bf180452436b1" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9c1d45f66b3c2cfd48730d2340d836f0" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3aa2f8851b9a21e8089990b789053fa5" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "74404dfcf00c246456f2333d9eb0c768" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "95d8cd3fa7e0f14426b1ba90c90503b0" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e266f5501698a5542a59ac8a3ae6853a" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7dfac806010d7a40f395765919ca4370" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "e73ebc2b41247de3f421ffcb4e21aeb8" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "88ab8ba685f09ab7d0c05272d05f76e3" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3ea76549bb97e5a8f9e20ab5629fd4e4" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cb1d511719e48fa0cc1050e9cb9fc7bc" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bcb5389c1781c3ea1c3d316164be1c23" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "49694811695f6d8d07acbf745fbbf6d9" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f8c72d8a52ee6dd7c28d259edb3dd52a" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "617411c28b1acd2064c2bf597794561a" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b0b5e9501e66d4a443a580d958ec912a" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c7d5575751a1f8e380705706b7da5d32" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "b6e8805c15a404af36fd8afebffab72c" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fa879caf8971c0880e1bae3193383efe" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d03b3ec131e14dbe56cc23ea829607ec" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b81d88c3e11ada987ba32a714fda2e48" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ab8ae63b5632f24088925c9b7869adc8" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7e628005783bd068c4de7e7d17becdc2" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bdfcd3062d1772bb96a2f66d3514736d" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4b622b288dce74f0f4ac6ec54eff7ee7" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f92a2025c0dc7786462649299a023b92" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 32817152, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 25477120 } ], "md5sum": "e44688afe376be68497776234b712e3b" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "960d1bf66f4e8ed617ebe28dcd77e53b" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c805eeac496fee381e4145ce21ef3ff3" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "03be1da585b197e663765e0715e2ca3a" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9267c9bf1abc0cb18bb365290055abf7" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cd8d7f411d046032c91fa27ac069ce20" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ce094114dff3917f2662e2113c27ef5c" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "02dcce3f6f85e41e3f8912a494626a7a" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d1b4d67c3e1719c09eabf75ac547014f" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c715844daca04d67bbdb5cdcadc31dfa" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "23a011402cbe0b9919799a67d31c8a63" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 29196288, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1310720 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2375680 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392064 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2408448 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6078464 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13418496 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 13434880 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14745600 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15794176 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 15810560 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 19480576 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26820608 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26836992 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28147712 } ], "md5sum": "c76bd56cf6f7a922163a3e8a17d92484" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b8ab4622c891d23c2f2aaa06790983f0" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4b696222d19874aeb236b767c20c6f80" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7764d3a5d6e9687402f84e5d18bea54b" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7665c5653cf92bd3f1e2804780267fc2" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4cfc9567b3cacf290736a5cabbd66d9e" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "46b83ddea07b1bb79b4944a2874c8799" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a46473baf0590771f06d727e81161763" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a6f8f9c301c99ba912ea0844d942f5a5" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e3e8857cb9a936ce73fdf61c8f20e3e2" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 30490624, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9715712 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13402112 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13418496 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 17088512 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 24444928 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25755648 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26804224 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26820608 } ], "md5sum": "637fb80c9fb3b166caf9d8b333243d50" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a70afba27105890e8fd3a62f711756ee" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "23790761c69e6dfd71e77573dd7113d2" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "906d5ba86df0ef0a717fe909d5ab99ae" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "66d08638d6d38c979c05d818a470fda3" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f40673e186d9ff0cbb5e9124fdb13c0b" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2d5e4c6bd231911e0d7b9ca5bb7a92d9" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7df31eb7e06f531cde96ff0d373b7579" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "573bbe510113005913a6622438662912" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "1f9f30c41fcd71553cbbdf9437d1856f" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e19c774801bfdbddd30bd4879fd5904c" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "551a2f11f7dd057879e4b97bdf5bacfa" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "389caad95e660bd195f4300828966d1b" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2efe84dc30d56d6c803d2ead2fec8ecd" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "59704f8532bfb212f024c85108177294" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "aac9a8b8191dcc462f43a7dd3afb42c6" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f07758ce8b2c03a29932bd384d98b49d" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c77a4ed5837c1d6d0b2f4bff5bcbde24" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3eed40f96bec1b7dfc1b301691ff8008" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "e89770b7051eac5bb65b902e5c6e2567" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6dcac02a5e0aa661eabde1a466c076e3" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "eb025a588000721f201deec106530043" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2a04fd19e8495cfdf0a1b37770b44d5e" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ec6e8977f9a8cb616ee6b77f23e137d8" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6b0f8d0a7f02148359fd0c4ecd086c8c" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1a6c6cd6b9a436f2be3c21c5584149ca" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8b3469d66d22889a29dc475698a47d03" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1ba3e7104f7363d26bffe17cdff05838" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "f56f3260fdc5dc4cbc127fc01d7385b6" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9c6fac4eb20ed805343425a32ee6b27d" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b18cb2b436b6375f95cc8f8f0c435e19" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "927b0dee84862a866a4f2b23262d5560" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a6178a5609c9a18b6c3f99697d161236" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fc1c3fb3409c8eda74db43e6d29d51b8" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0440764f2f8c42ce96970fc5d87f7656" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7ad76b37db7320f61509f9ee1a7d368e" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6e43cc201bcfd5bc2428647b76c90ac5" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cbd31ecab5e96f82fc29565e290ad19f" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "543a55b18bc570dc9a4050fd69fe5f33" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "3cd1bd37e9da5fa3098c8f99c9c4e6b2" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "77e01e01a8688e9b1723bbfcd0be9b50" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8f984e7d7823bca09d593671f91e21fc" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1231ca8abf17abda94b59313d07dc0d6" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fbfd73a31e499c0bd00ec90d488cb67f" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "13e4b6263c8d2e2331b49f496513deae" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d65175b971bcce4242192863afc8428b" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d8d0fb2c75f8151c0b285a7479e682ec" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0db85adc89e514a8df0427756d5043a1" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "ed8ae69ca248c3a05d1f10ea6f340599" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bae9b89fff81e05474d786545a72c8ae" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9bdc75d46c274b4c083f2f8035d7c6c0" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1807221e62856eb1c68acacf5c2bdfb7" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cae4df133bef3fe701fb38e0cfcfdc05" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2d4883af7218040b101fb7caad8d7b2c" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9e3635672a75c7459996d45db8ad4a18" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "87d3118a132001567107ff0a97576d61" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ae637f0643db74d5d33274b8703fae4d" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "2473df2b50200702dbabf41c0c028586" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "26c5e15f4818dbd7a148e75180532ace" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cefcac532c7f224bb93dfce7f359c8ed" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "328d1ffe6077a6f6602ff41b76683e12" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9ee4384f122488264237ae923bc0a15c" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "45dd90f8cb3585f9c17ae7b2db43ae86" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e9b485ff71aa5836f94fee1dc9c18a8f" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "064c862d24f9139ab23140602687e344" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "89571c1283d3fa41aed0cb6f89e1f937" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f2913e806491efc2e3b34fae46eb8c04" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b10714c2f8a50c57b5e6fb9f273ef23f" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "d9023ad03fec8f6197a5b6bcbcf0e6b1" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "71d2abce2344fdc42b65cfb39132c8a6" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a176f94e38aace96dd2bc7580bdb6eb3" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "62a8dfd5d24daed0c10abec0d76374d2" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4bef63b1133b375aee9acb7ab49d9ea7" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fadea87bf9ed73c094c7b60926823b5e" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d4f08f467b5a9f7c22563fadf4dbf0d5" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b5fe097e8fe79943207e1a5cb21bb40e" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dc1a75478571df96cb3e037bbe1212da" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "06ac6cf63fa5d8d77bffbbff8e906146" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "304bb9e2924a69e34241cba3b4d58001" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d375eefc53a64179c73cd9b189c70b98" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e1b64922433283d2a166d0db3e726789" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fac18edc0bc9f765457b3e9508654abb" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "80cf444988c52b85007c761bdd5f4205" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ee54f36f87906a23f2dff3bd9fdb370c" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d3b30d52521a3064ed50207c1ed2a25c" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "265df46f079ea8d402794e4d5ad66ee9" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6411483eef97550b67cdb1d50d8822a3" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7a1fd90e78e90b93c1a4ee05bf77cb47" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "3f92e3faf393bb023ad4e945085f454c" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bd30d0c37f08aaf09a54cc0bc8ca30b1" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6e656e80977d35a645c8e4ec96f58ab1" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fc41cb29331c0b1faba49f857315a6e7" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "80da4831f49fd014f5655524b23081c9" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bddd91d021d67d98057ab2e96d1f7b47" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "779c675a985ea72b58005d26f0a54016" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "eaec0d38b7ce8d5f0e08de55f9725f91" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "80deca43dcffcc63c9a24741a8b7561c" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "90b794782cd99f3730f4003e066f3ae3" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e8ca505294dd171b309dfd836fe0bad4" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7dd07cd823a9d00292110e981b58b148" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "33b1d1a6f03a3980e8f5b72601ca83df" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d9e138dad41f9ed36962869283342ebb" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f79e992717a1b0bf31cb1f35b0e42d8b" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c51eeab46f0b1ef206970909670ae20e" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "adaa9bc9d1034544767285b345838ff7" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5d71692e202392e2d23321f44955a44f" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "e813150ad4811b3de04a8782a38ad84f" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ceebfd1dc69a6481570ac5d7ca9c1d3d" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fa9d980adad3432c922313fffa80931e" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cf518f11b272fcdb86f838b81698d5db" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0b2c0c6e0744e4906e08c4b7c2b8185a" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d1ff02d99831b26dd9b60cf8a63d6a77" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2b10ce17ce662e777605abb25ac29792" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "af24534cf34b0b56b80f1b52764ff71f" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c3ffe074ba076e79f239d414c4bed5cb" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cc33062156c5ceffadf26577376a270a" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8604a97588e59df2a75b033edb9ffe8f" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "0b04d9e8acd2f2420d4cbbe7efcdbf2c" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d96c65116e5b1c862dbf60269dee6e42" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "167409d024eabb819f3ba07a27e6615f" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b665a6c72f0a01b58f04630d27d92048" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f59fc8ca9d7fbdaeb83c2f23e6705942" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2fc4142091dce26505a1402457c038c3" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4e000e7cf9fb6bef5d82a85160f855c4" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "69702a60d3808989b566621b38bd04d2" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fc26b965d4e921c752bc3dab570f0e72" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "21e2f81cfed76e585caff2495fc8ff55" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cac39031866c7becd4f995e8b8def826" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3bbd01e01ee73c5d8ee492f79ca149ca" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "58eae90e703b9a2f7052c459be04b768" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a9ff11bf82a8aa8da1a7293e4f1b153c" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b46ccd85d1389523a83a790cb4e51205" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "02d4e388d5bfc59ec154bc481fa7e33c" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "59ae56cfefc786d9ffc1602cffc4d260" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "de2088a91a8c25d6117d53251df7bb54" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "22ff80d1ebb9fc9b44a3d1ccbd143d21" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0237f16740ec7a4be1bcff540b8ea646" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "135c9629d767d58fd1b76129072e1f1b" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "89b5b9adcfcb5cea39edccec30bf19f2" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "711fb4fbb804094a03874f2df3cf2b94" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "017c0207e02acd46a2e88fcb180321b7" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "167a47c1f3252f6bae5ba34428421fb1" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7824c4a4f113f51e9b682ba746bf24a1" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "81c1a830c6075622e3bf1dae64ed82f6" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e6d9fcddb82a096160b20625437a0f73" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1536fe5604e346048b6bb26f78cec79a" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "d458dc28eff767b0722d7cae8ab5e237" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "450c3600ea4d068b8b30307c8dce9f63" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "15fb8be7e986200e7118f870202bab4a" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e2585c96ff725b0ca96a68620b56b82d" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "03f20e375d326ab178cbf4b2f98d7b8c" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bbec400a4010b7993f09691a7b22effb" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "041f2751ebee3699d6b3e5b5bbebd3b8" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6d0262df1d4ae1def9db467652d516b3" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "21e43f7a6f3896191525f04be4dccb5a" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "21dbcad32513f4a7945be531e25aadcc" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "28894f31d17ae6a9b215c5902565c62f" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "390e476a81a49f64df21b458f6c7fc33" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "612e281faced9d6a5b9aabac94fd1e2d" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0a9a27b3fc39b5c877c96c453b0d196c" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e8841fc6785fc994ffd05eecf00d13c1" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c7180874f4e706ff05c8e9bb4726dbfd" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "680d2fd473481c8fb0e809ac93b1e9c7" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3c893b77caba64657f295b5eaee6cfed" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7484055d86afe2b074f04e84da3ec6df" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "df34d81702ec731e92746ce1820de897" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "b4d5b4df753f68dcaa8ec9843316d975" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "da5216360a72df46cd6155c73337a747" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7c2d5da4bbe7eda742ddb2f573fa2c40" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ee64a474e589e941353dcf87045ba07e" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "aa01a4c73ba55e5d433c5d25266e2f01" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f0184d199be0635cab5c751e213e2495" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "73416d37286691292aaa3d12e11495d1" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4b1e7202a87565f5364816000aa51a9b" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1dd7c213f5ea778e97576ac20d5f4565" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2fb0f31511cbdbe910c5f7610b79c267" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c24156e537a7ebb93886b5c51e3acc49" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25477120 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26787840 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27836416 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "9773e2b9ed4bb2ea0eb3a1470d9e8a79" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fe0019cf2e681e74f4b19f0ec02c9530" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "73c350a56c789b8f8ed25e6d4cd8ae66" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c84f8766b72133950035b954c9080280" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "335cf6a5cac9bdb84193f14ee985459f" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7dd0bc02024572dec83f5802e2620af4" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cb2f71ea2e737278cb3ea2bf2f95e85d" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "09f565f18f053bf0508dfa7dee5983a2" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "45528089266e197dd814a8a09f805d85" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "1043dd27a358462a1158316bf574dcca" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "560d68a35d4e367f257e8f5d24872123" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ae76d6ff5454fded94080a864211a7c6" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a99188e957ac7ba56eda367505ca0fe7" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b6ea0db7f7bc8aa1c06219322f493514" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9f895ef8bc4d3a5dd932b28e6b501324" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fd1552a80501ed75524ca7c6233d8421" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c2910a28e30e90f2a901a3103968a166" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fb502e189cb7b3622e2bd51d67182c31" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "72e8a483859990d3a1c841b5c6b08aa4" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "996d6e0683540c286b4bf901e9f65742" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "55b10904b2d1819a6315f99260750dd9" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "44b7a467c94cc61675b919304c64b1dc" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c00796fbfabe7fe5a1ccaef618a43844" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d577415c5f064c0b73d42f9684c7a4b5" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "77d7ac992f44fd96a6ada9f812084ad1" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "548df139908140d022abc4549ab93c37" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0f3ce088e5729275dfd73fb191b98034" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "62352c6e2a23e9661cb3a734130ea9ea" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e63b5c7add250333122ae259d292c679" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "d31518222d559c7c411fb4491eceddc2" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bccb227f26b7a09aab7c9ab228954d48" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b953c506c0b7f41499d20ea1a72a7ae2" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8e2d92f8f5eab5b23a5ec4598a67817d" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c5ad691c9ef5c233f0591ccd46ef1e06" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 12075008, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 } ], "md5sum": "3a2171ee6d2fea47e190c9ec3e8d99f3" } ] }