{ "metadata": { "ParamSize": 805, "ParamBytes": 36381212672.0, "BitsPerParam": 3.4891900844756822 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "lm_head.q_weight", "shape": [ 1024, 128256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "3b0091e9675cc30ddb562de5cb1cab5e" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "be56f00c423df3b0695eab97d8f5c83a" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4478a8348569679ca051121053428673" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "56b93512f25b610d2c68c3db50b25f22" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "dce9b5168708d4d7f6bd1ac7489117f4" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31145984, "records": [ { "name": "lm_head.q_scale", "shape": [ 64, 128256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16416768, "byteOffset": 0 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16416768 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 16433152 }, { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 20103168 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27443200 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27459584 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27475968 } ], "md5sum": "f6d4a2a6f5b3f4c56ac1a425748319ac" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c653a566e9cbac7000648582ba3e79d4" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9dbd53596bd713459158086e6d0dc6d0" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "3ed7d852262933b734b9bba319aebb07" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f071646732491b3c1fa353ae5bfee3ff" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f1072e72e9569e019627fe0d4f8a99ff" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29835264, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16416768, "byteOffset": 9732096 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26148864 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26165248 } ], "md5sum": "f90c8198d41feda54a54f15df0cea400" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "95ea54424c5c7a89f4b8425b0a026ecc" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0888ed7f1121243b2c9f94d94396920f" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8ba60af38072934d423d014d235abbca" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cebe5d43f63630cc878a4d957aab43fd" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d04c603a0303c5b831d7a2bf90ecfc80" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9e1e3c38d32c8dd420201537cad85cd7" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "32f05c224733ec5fdb84a6b454c23d40" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "06a51f0ca8fb8db38b96223c0ba6c96d" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "5bf0386539edca7d59676de280761484" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d3c98fea786141a2f439cb9980594aac" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "06a157e7db7baece876381052459bf3a" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5183ab98936fccd317217e003c6fc3b2" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0595a21332399b7c5b2c204036e3c333" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "542f7a1fdba0c97a4360dac7fcef7469" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4ba62c64e7b1e0fd72f8bee6571fd965" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8ab88a0ebb486e27d2c1eb3d1008fec7" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "852264770a9bf03b4f20fe2db33292c6" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a430790f2fd7026b49aa0b8086a95242" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "934c7678da36bc800cacf06ad4b0ec4a" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "c6430382eca1fcd18b3b762e46df91b4" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "73c670f32bfc0c4521c6bf2235704b47" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ea9aec136bf042b02bb2208af535bedb" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6dc67927ca7bce03aa61911302c4819b" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "829c3dcd8baab3440f7f3213bd8aec69" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d2db7770f6c414cb9a43c4ae7e8eb41c" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e8153e8e85b753a42f317065f0d179a9" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4893b007a838f28492137b86d152f4a6" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f2659ad43c339b30a5efbaa8e4aa2e59" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fa7a569f9350eec26a7b1327f8353a07" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "37f82b2bd128f144537ccba3850446b2" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 32833536, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 23117824 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 30457856 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31768576 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32817152 } ], "md5sum": "51ba6e6d64e65d831c50bd2afbd805a1" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "31d6eee9944daca913d8c76209e639df" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0149f3350dfbbb1527c87ae7c104a5dd" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "65dc6f40cde595f2c4228ed95f420cb4" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6350be185a18cf91a96600b1cd77e1df" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2ed3ae06f60e30f74e9dce709e2fe68b" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6671a01e018565f35fc9ea3f7148da0c" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f7777619655728bb608f7abdf089a4d0" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 28147712, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11042816 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11059200 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11075584 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14745600 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22085632 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 22102016 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 23412736 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24461312 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24477696 } ], "md5sum": "009efc2ebb07fd43b345ed177e300c14" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7d31837d6957f68075c141ef0e1356a6" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "39b55e353a5680b469eb7a25f902453d" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "91f4f475bc21ae417dc257907d8ed7e1" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e1927e37b645fcd58b8c0b0882f86da5" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1e1207762e36011c4b26caef56a2c919" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2cccdd4ff36a0ad8ebec04a75d084146" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "da78414bab7e35ce985c531b629caf56" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f70d68b84017245d7b69a9fd9b2faffb" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "14e6b7fd97c200d3936a9b666f8644f1" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4e9967bb96cd5e4226a994188ed49961" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "db104906924c9db7ad3234e72f5f1a74" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "011cd7c32db966896f5090f5825b7ef3" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1ff37d82d6ee0b89aba1fa1f95c998c7" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5a6178e087a2b8ed8738adc67543a63a" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9a81d92ce109bc288765de757863a84a" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "efdf5163be0387c193f8ea94426e573d" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cb491ec82e4bfc4209fe971cbe8e34d3" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5c4417c465e28b155946239fe370457f" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "9ef7bc8cfd4dd68031e444fcb82371ae" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4e37404d5366ff289a72ba3c79dfaf58" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9789578c1ebf2e3ea686f86a9e0240e6" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "94d97020058893124908fa5134ec4381" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5ee50d7c6265fb78507839219279a5a6" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "164961fcce10045f3cf1324433c77ae0" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "930a2a55b71cd6b533b31d6aedf0f664" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5026b1b3df1cf1660f74331cbd28c8f6" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8f6400866e8e0ea8c695aa674e0188ee" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 32817152, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 25477120 } ], "md5sum": "5695dadd02a30fe487b62d83555d9f0d" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cc8f23fdd12598feb8cc825d7d3a1f11" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "431a050ff559042d93b6511507325177" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "73390833fde31e09f118729ebfe6ab98" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f2734a6cd2ba9c5329302764184d0d3a" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1439c4292b98729024cf07a0b1e26c4c" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b096e3e976fd20a5e98fc44a17159132" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "93129b99020392932a0b1a3b3941b081" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "677598e8dfe035a700a542a55136d4e0" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "be79c28b3351d0483973b307986b9adf" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "42da9994f83d506eb216fae7c3001532" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 29196288, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1310720 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2375680 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392064 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2408448 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6078464 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13418496 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 13434880 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14745600 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15794176 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 15810560 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 19480576 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26820608 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26836992 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28147712 } ], "md5sum": "72418166e8f748918a70a4bb9d69edaf" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "80ed3c6c70748f0f7ba22265542afb0a" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "348d34f16759a62612a8c5f4a78922a4" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fb66d8446a6c39246597241c30f88fe5" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "77d716644efcb4b9d4f4f3ffff4945d3" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "64716031b8186e5155a4a12d8a3b68e0" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "282a0834aa7f467f9829fecba86035bb" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "14ffcf995d9d46e917b5c1eb01a7f8f6" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "75f2cc197f423a2e28d51e10341fde35" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f860cab5545acadd721a3b2010c1ac66" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 30490624, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9715712 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13402112 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13418496 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 17088512 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 24444928 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25755648 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26804224 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26820608 } ], "md5sum": "2e78343614ed5433dae3420aebb990a6" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4480b9fd23304d5c8488427263a3b228" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9de189f385118b72eb450958ef50ae65" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e7e7fe3f8c9d83741e424a864dddfb89" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "deb60485235a4994591255aaa95981ba" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bee0337ef8f9da7483c6c7ff6c015700" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "46e3d4e233e8956ef149c2c1beee43b5" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ad137a7bc2327fe205fd451cb0e40093" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3c8a6c4cd1a2b0b9008903a58663b846" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "cbe6e8eeea767f283471189912540a58" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "820073a3b296a0f73f76992b0b51e649" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "74063931550fd1584781d85e166af512" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "59379a514ad24897d25418f2d1360fd5" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ade9db29f26824c777acba7805dd42b9" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8621e35e3317402857b5f5322fde94aa" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4c11cfa368426fb3a60b6636b34fe10a" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ca11fbba1aee1c2dbd9668a06d91bc1a" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "dcaef57a9944b7e6c6352b0038cf16fa" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2a54a2eaa7debea816220a644e7edce3" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "41db23e74eebcc795df0fbd42fcc7a81" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b0548d208dd49259b9971a164c5fafa4" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "14dc965997b9c0cdd82358dc6765e9cc" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8e62a070b0b6d8936e3b9f6587484866" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "58f555999cf2a541561cdc8aba1732e4" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1876766cf0c65a6d76aa3102c4eae54b" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "57234ec1aaf243adc00ccb3b46761b99" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "887ff2ed5fa38d5d5152d59d573d1573" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a184db5ba04fe9410e9d4c1f6c82d125" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 32817152, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 25477120 } ], "md5sum": "fe3e53ab33709462447d0727acc8c1dd" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f10dd3aea4f80110c6ee5a8cd0b9cd14" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6c33918033ef6614d943cc897ae5f061" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1d89268527cdde58ab54adccb8a1b2b5" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5cfeabc2e3e1c10d0055410ecba5f79c" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "13c529d9f9c12592361abca7d961e2d5" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0348047d62218723d2081d0be58006a7" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "21f6477a425174d783c4c6731a7416df" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "93e24a5ec625824875d9d54d3a248228" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "22aed74de5539f45f7b1905a719ce2ee" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3fa3532d8a5ceb3c94e7d2046b966fb5" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 29196288, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1310720 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2375680 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392064 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2408448 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6078464 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13418496 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 13434880 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14745600 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15794176 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 15810560 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 19480576 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26820608 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26836992 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28147712 } ], "md5sum": "3bb9ca7f459745574946a93e2e513c45" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2ce831f614f04896be708cb03032ee40" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0d862bfbb37aa4cea12e0dd4fe9fd313" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2e08b146f5f788b588c7bcd0e4cd88e2" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "182973e3cd3b763cfc4c0291b520282e" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "25c9e180f5ef120857c370240db90156" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "243ad6aa08b6573f3e1379588a294639" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "527868f48a66bece3d5d6470799eab4e" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "615c19403eae0ec9a55ac90848cb7a76" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a5c8ba457d2b961b4a94c71651801776" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 30490624, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9715712 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13402112 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13418496 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 17088512 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 24444928 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25755648 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26804224 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26820608 } ], "md5sum": "4a51dc1c2ee39bb9669c4e1261ed46b6" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "69e20a9a0d9c53061a72b73d12fe3831" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "46b54148e38c1ece24b789c928021b16" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "abb1f80c7c01237cfe819bc18302569e" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7e8c77dac7f8dd41685aedac58eef383" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2c31caf801dbaee28d92238dedf771bb" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "64928d45083774a5af2e176b07290c08" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f9c7624248dbac6c1fcb58aea5528d47" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "34d05c67b3091498883eefd6c67fbdcd" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "30ccd877b4796fd264698fc8c7169fb4" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b7fd1eff91aea059e79496764482a61d" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "632df46c64efe7e51aaa41d7daba0d79" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5089efd3781793492aab08101aa07c10" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c29809ab709bd74cb8d454b2cece92ee" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "34d71d974d30dda83e7772ee5471597b" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ddb37e3db949b258fb9e3e6572780a27" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d1d9d8dca48b60cdb85731784544ee83" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "01aa5a99364b8d5894964d8fe36234a6" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "273fb95dea3e4848307148cf5044c20d" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "3f0e6f834f88ccf775e27be94295645f" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c8b79b1e66187668bed099be7099d1cb" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "777491fb5bfb16036ae91b5a4ebef130" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9be97d51d5fb7d2fe3829569a2ad924b" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0f55e3e98a33b33db3838e90f32816cf" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4ea7d81e1f6e740ecb15021183e938c2" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c25ce33d3ac4d0ffd7fdc8c7ecfde824" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8dcd987154baf55a748396c54480bdaa" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "19e1ee9456987e4cda2c6ba5c007f2c8" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "1407d5ae777a9c246d516da850da6f7a" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8ae6907a8cf89121f68693e3b5a63f21" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ba1d0d9b0c9dc38631524cc42dd0a8c4" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4affed2de7a8c92cb837cf08b442bdfb" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8289ea17d79f7fcedf2fb68b63119f46" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "19fdd42368ea9ffe0b5e5ba09c54bc81" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "99e7e7c5be722517dd0c57302c309c4d" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5603632441671b36c9a63c59d70e46e1" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1969fc27cdfb4c93b505287317ae6137" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b6720568cf479fc9e0d12ae2aba56935" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "07afda7e70c193ae54fb045890bc5a4a" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "6c311af7d5ce5c42338345e86f454940" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0f2390981da40ee17ad0c304c54c39f5" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "42c1cd65568ccef72b282794346f1072" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ebf3e71800f609d13e4f19c8fdd45ec4" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c772735c1425e1160f5443f186e3f98c" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "030644a9dad18b623364715ff9fa11d6" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f4bcea1e59f974535406c66afd538930" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1a4b45782b1bf9fb3113fe8e28fffd45" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9f880021cce2fb846475d1e7a656035e" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "6f91bcf618e72719d41e352fc32f252d" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3609ef8a95160047e47d62550461a2a3" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "279b7be92755bb281989f28fab742047" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2262d7408b546fd6d5141ab4faf22c65" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d6e255341b24ddc9d1759ad76b4c42a6" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "21d35a869de8054f852a661010da9612" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9b5255888c6d7effa017ab90900e005d" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e084b24048946b16667976b99bdee058" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8afac5754347eae8def7807680b077ca" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "663223a10337d35a8e019c5c40d5fde4" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0eafe3652c9e1f354037d5b5d7f25184" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5be97b9b9d4b18872e959afae821079f" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d15f2eeee7ae5bfcaceb65e952ab51ea" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f92ef9197e16a08ba008fe9c5129798b" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "79fb7c7e64e8c5b9cd6c888af8ad381a" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d5d6c9cc2c6e719200178f8f21eb9be3" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9c471cd81096f026b544ffca8b075dd8" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d1223f3172803f705f9c9cfbe82485b9" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "65b929a8e516cbc1511c4a6bfd806ad9" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "14580b4d64927ae4fdd3b5f8b50e8889" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "5c58d9390e1d4858d7570911aa0db804" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "eca2ef1c7b0e444c99b07cc36050602f" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cb705bdba02d01704fb614aa32b2bb38" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "96ad1e4907252fe163c600ef791e723a" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2114c4292828df499a0159f166588dca" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bd24ad6d7395d76362716f979a5b48fa" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "75db6ac509b7d9ddc43ce63f8fdd0b33" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "016bc4a3304a8cd34ca5ab297defa696" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "41d4d7bed32becae70b36315a443e31a" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "5d61eeb2a2dbadeb879c8548bda4ff7b" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c01a55683acbf6f15e7c0b3929af9bae" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "de2ccc63968b9564a20173c314197819" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9e1d0b561dc0bf82a54220655e9aa592" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "54711db97eeb40358dd63f3825551b06" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "59f7f003aa0744e6669117109a812d28" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "868d2c3b7ea97da6e9d648155b4bc69f" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "27a9c162ff397380e192a8343ec9374a" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6e9e8b9e3e29f38af153cc335e6dc947" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "02ff856d99434b76a853cc73478a4c48" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2ae29f87e9d9c3187867e6011f550c96" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "577e75ef42e9895879103627b6e59569" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5a50c2da8466d4f489652a7c14f81119" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0f5ca0a8c67158b7ba8690d03558ed1b" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4e707c9066d278882227c601ffec7be9" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "75c4bcbc5598bc0ca99cd2b25ae266d1" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "330c1759f96dbd89eb91a4b46435a8b9" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7c5feecb3d0c3b30a35df361caf052de" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ca640ddfb7780978dd56ccb748ad087a" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f731f4f07ff38e2ba2a49d840f67d02e" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "330cc1c93bfee923a2e88f5c68a5501d" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "99a911d9e99864e2112bd5bb2bbdd19c" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ead61be16475f36814978181e51bb030" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8a1fbfa9f095c9efafb9c08e2919635b" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "24acaddfb91d4d9e74f7bf2bd4edace7" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f6644ba7fba7c575e2332b2910ec0a5f" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fbec254bcfafb4a22b230e14465f55f0" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a7490365bca18bd717987e5517df436e" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fca77f1911f6cba5136d18caa2fef548" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "43dc193856d323fdbf266a38c5779922" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "974d58217cdf72a2c91b64446733b325" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3387563bbe45c6be27975ad29def2f1b" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "461839505502c7881d7da79512bb8051" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9f095a3501e07a91c8289cd60e95bc23" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3056122712a9e98bd07f226320cd0ba8" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "17e46ef0062d108e06a864f616a2c3aa" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bb75dc4c14cde443e6debad84382c04e" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "54329f61ad47b31a0b7a9a81b57a47bc" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "21392cf5420d3688a46b0927e797c470" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8f7a8f9e05406bd12565e33e3b0e0e96" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "c1b2e40578bc6042b7d023c56d8a5632" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "455cdd15aaaf909d319b2332e5ac01bb" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b555cac78f362b83f2955b1c91acda13" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "62d775ed72e59fe3a10e5195da6bd512" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f937e8d2b6305a0d9802099dedca9728" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c636517d8768268e583f243a8a29d09b" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "01facfd13b0b7f6594c97c88576ed25d" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "614148f4d2e7ff69587a8d59e7f4e2c4" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "002c17ae124cfeaaea821689ee3a4f1c" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "62966aa2b64023f68363749a2c513464" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0f92ac9205729489247fb07340aace96" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5d6f856c59ff079f1ee34add0de3e67d" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "97d895a505843509fae065a1d009678b" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b74aaefa562355bc2bfafedbbf0ef6ae" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d201407c61d540d89e2f9efaf9baf0da" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0e124bcd7ebda229c9096eb81266a12b" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "347cdaf3db0a34a79cc8cbac16ad2f12" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9aef1782bb8e5988ffb3c20665112a64" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cfc16648f202ee46262772795c40e678" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "376643fd129e576b2188c51090bc9fa0" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "83aa3ef04a09aa01336db05f112e7f61" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ad54bde685b2da77bbfb037dfa5e4914" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3898c37583b477474612f15fc5b99081" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f28b2a7e2b01dc4d87f2c954eccbccca" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "18b457f196e38a7ab0bf752ca0328763" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a476f5ddaa2cd9b6aa1f1b150db5e847" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "10b61940d19aca36e28269d1f890d461" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f5de856bc2e9c87f505428588fc1443b" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "dc2031a03433ea5c03a06172b995bd18" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "33ec8db5e6d881993612a125dd851df9" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cea6639f3428190c4a039b2d078fa87f" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "502585ec5e7ca20ef1727c835d40ba83" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f304cbfab1f9f824ce17ecae669fcb23" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7375b61c4ea7cf7802bdf215d34f3e23" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b0483199c9b4013c5d9c52ac48c79fe4" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2cb5a1e7df3a88545b6be7dcaa2d4166" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "da47d56092c6f118b4b3d393bd6e8328" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9cda5c50ad22b9159e9ef3b2f336ad2e" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "60a37d160cf0222635c3f2b5ab40c4a3" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8c644e70185676b42ae8cb6ef8f191b9" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "22866b1f21bbc156691e02f3ea9989c2" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d72cfd9e9d541f5f24218fb753f48f25" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c80fb10606f530a7ab1606317bd9ae0f" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e88d60d923855df24a71ef7e5aadc7ff" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8771096b7586c86515d98c36d2f1a966" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "01f85b64c7bb79f7c1056b27b80ed53a" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d83f35dd24c463b14c23c285af255d19" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c70b9f741f2fb6ca5302398f220d1649" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9d0cfc8e5a2c95d39a6cd70c13242d27" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "a6877ed82ed9c968276a136b6e378859" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e343cc99e4836e3fbbacf6556887076b" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f9f9d4ef4a016398997488ab821b13f8" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cb75286bf1aaa8372cb7f2b89aa36e6b" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9c13c0f00ac62c298118b39b6fdb84bd" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "209becb42877a79b6a0d51983ae5064f" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ed4bb5a6f09424c9ffda78abdd16f3c4" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6ef2af93afd89be6a09cbdc23f2e3143" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d45f467ef38e313da8d61512088eb729" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "28288364a5a12c9f02a303278bc84c0f" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "68e19189d9232b9f041bc581ec68b0a5" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25477120 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26787840 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27836416 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "ae8965ae4e77a6b282ffdfbb5c41786e" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "27c0fedb0e17e26b9be8e127b588b89c" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a9a474ce035753e67d6557a5bc7ca207" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "679ce937c544dc6a6be3ccf13b22e7de" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1be261207bdc1993e1e59f1ca416e347" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7acd59ac92dff12ad89705be643a8734" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "15aec20d9f7a52c6395631a8a2157d16" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1f4fe57a25173dde5790e288976cf5da" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "817d9fd51f1349d90b7bd1e896518403" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "267a2940a282cee7b97afbf50f523ef2" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9f0d72234c0eda43ac969f1916277aea" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a02c2c9a204b338e8b4ca06716ef1f21" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "226aa134c762243f65dac492574bd5f2" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e04364b7107ca3f637367a3a433ea82a" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "192a4fc4e93f2fa8fbdead2181b60cc3" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4146daf127622c5203528f7d7eb2b0c8" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "728d4f6fa381506740311c0e652fcd38" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2c3a00cef5e3998ec483e4d31e880a57" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "55a05edfcb0264d0007026b0b6eec428" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d9ed65ae2697b361c805f3cd6c2b13cb" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "b699643b2520ef6b115b0e6dd129f5a1" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c72a3ee63224b4eb34bfe3485b6db29a" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "81161a0da23723c17d2c2a7866daa899" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d4ac69960637e5343bfd6398c42d5edd" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "22b2b394e0c1f77e4bb3bee4db94fb23" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1245f194787660c939e06d20ed222a7e" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b7fe10f14840df968251131693d3e104" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0d589884b18c58bb00c05dcf73aa82d7" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "dedf357090aef5a98c35fdef9c4a8818" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "58ec1be3855a545fcfcd4ccac65fa43f" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d7f2388bce4f1c3bdd4dd110980b8326" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "85bca053d1fd892872f3daf5fbdd0c16" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b5a152a305b212fd71d88efa0d378cab" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c7effcf6de4bc739855d76b26e152423" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 12075008, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 } ], "md5sum": "c5e9da5090e20220c4c20ae36ddedb1e" } ] }