{ "metadata": { "ParamSize": 805, "ParamBytes": 39688355840.0, "BitsPerParam": 4.356224340918386 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "859b21204aaee33211f44f42d0f5d238" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 65667072, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65667072, "byteOffset": 0 } ], "md5sum": "da74ce31b8e16e8dcf93f456eb245186" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "6d7e2bed74cb2dd3f46d886b56a26409" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 65667072, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65667072, "byteOffset": 0 } ], "md5sum": "fbeae3e417d0d91bbaf7467ca7f30753" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c6c2adea9b78bd2c97955d553b836bde" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "07fc20e9a649bc6a5a9cccf5cfae7071" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "99549b25b2b47e3b7d486a6d3f6d49c0" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7c1a0f840fe2fb2f331315426e1805ce" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7a5092596aecadce95e96b84af6a84e2" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "177b5364b6c1148d8c47c74ccfb74938" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0883e4680fb2714110c121cc3d7fd064" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b2034739a2cd67724c24e84b03fd2213" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4a6baf26e6d50348d82ae8c731d7b5b2" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 16384 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "f221d99ffa08849cf5aae88d72fd0ab0" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d7ca3d43734a765ce7a55f52cffc659d" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b1da493ac58ea1d676871eff0cd7c97c" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "d20664a6015902f35593565c4b305a0a" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c132a5981a154589545244830debd510" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9e622158ac23a5bab322320341ee1c23" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3bc2f4f6d0ef46469204094b3eeebd70" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6e81522ce00bccdd0c1f82b76d8d6571" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0e8f7d098ea8a207b8c0462ce459ab6d" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "b2d6bee5780320497301b1861436edd9" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "968563fdf7b3438cb4e35eb63bec850b" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ef77895a02f6e73e43872d2ee8d624ae" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cd663299d2c8e915e6594b26a4a0b924" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "18a2b5fb462db1f0a833630a52b34f55" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7dc3f4b1847b7f5262fcdbb229c742d9" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "67448e5d3d644f4ea81c5dc886a5cf34" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "8dcc013845b580f88bf6bc883dfea148" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c8f62f4f5b5f35ec85d9f8e3746d06ae" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "eb0131d66da4cf816f50ecf4c715bca4" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "766482179e07e003917fef159f6c0dad" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ee10a5e9fd737e741f5ee455cd15aeed" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e79cbe42220119deccf97b56b92e5f44" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "60a4ddff7144f3e108eee6967d3e752d" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 18907136 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "9a7bbd45de5392399a1098ec68c0ca85" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ae8c4b4d2c39f8e1de8459d85d11b93b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "974256606672403099bf21a41e55afb1" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fe071776313e7c0d1e4ce35f5f0b77b2" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "69911fc6044ddd346d5f5b917bd6d767" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ca2c87d2307906a1b17a32b525e53881" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4d8def63551d6d97c7eaa193aaa380b6" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f53e3e349418262f5d86b8bd60a1654e" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8b2197fcbdbddb3ceb0660eba3d596c3" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "87ca1f13e71ae621702ce1c789e28cbf" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "afbe4f842ab17ddf06e674db8f500cd1" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f65944cba92912f0dd15a07b0a7f6fb0" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "cd4edd3d058f9d6f4d01270ffd24d54a" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1dfe0a52b13c6e1dbe0777ff5d9e6c95" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e6b34804bcfa42fbb52ab21e5108f981" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2b6104de3f7f79c39c4f5c41aa64ec88" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "725ce98cd9dd589ac139f50af9969033" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "70dda49ab0b689750da4b23b5b2fd233" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "6909e6523bf0c4922b6b079600037136" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4a455ecc90d142d0ea6efd180fa03408" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1557209e42d953d53569d478624b5614" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a4763a53804942280fd223d14b9e7375" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4a26d9162dd3e19e31e3b9f7ffe0d798" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5280af46f277d6bd25a7e27ffcd27514" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d114475e3c63cbcd93fa446359482b22" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "199a858b4f0b6500364b4c3de0d624e5" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3fdd0895165e580b758eb558da64b30e" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "af0fc2c2efc810abc9ce900ec27e4802" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d9ed1c8a1595ee516ae4cf179fd69a35" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1d379692c42a9b6ec7b673f7e993993e" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "b2788b7fa9439cfe588035623e1b0554" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7f4f342d9e0f428c85fc7c8107807a81" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ef7c1f9c222bdfcc16ba63be89a5d8a0" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b2a30a3a7203cc016ed38df6e6fbc25b" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "345477c2d9b01ec93bf49f759ffd50b8" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bca5a939732a0569fb39de212a7b5491" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "d7bc343f6c373d82902b89750d4bac8e" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5e1be49ceca30d1fba0afcec19ddeec7" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "842b897c8ef78a44fee073f721aa4cd4" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9f99a16ed4f1517a53833db729ebc428" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b9a93cf52ac7c0f282474aeb605f7460" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5a3d12092268ece9372dc1dad485780b" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "963306be404607e6db1905ef0b701303" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "1ec5ba75b19e95009ee8dc4f2e6caa18" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "368df6ee87abe97369e1c896934765c4" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "449f60f48f18b75a1d452154d8b7e561" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ad8424a2c3f7eb7496646f848ad6a355" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ad18a8f1fc739a2fed976dea278ade72" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "f31cd6b485be2cf973cddb1d984174b2" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fcd226ef00e1986162208ecc099388ea" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "620628a1c660d872776d8391309f8e3d" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1ccc902f4d00c2a06987c4cbc27bbbab" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b968956c5018535e386e4638651a84d3" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ae77e11fce7ce2dddf408070c47dfb54" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "e3b04c2bb3141de5d659e647696f066f" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "75f564d386dbc85168ef3b1297cba8b4" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0a6f11593aa44110a69f14de20274afb" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "06fd7533c05cf779142a7d8f55b7441a" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bb3f580dc7a2958feedc1374c58622e3" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5949e869a4d9fba77f2b2c18047a9e23" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "56e5d5751bd901b47a6f79e5546e20ea" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "f8fc9fab425927abc276ac49d5d15a94" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "605f3e34d45da2c427b5f9211d335220" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d42c36ebb8e5d6680b3c07110537c5bd" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6e07e6b42e304d23873540622972cdbd" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "15f56c7edcaf026ec3586d7406554353" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "c2e67594fe7488a3164fc8e114132234" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "abb7b7f9ea8d97c9a1f9aee9fee1d333" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2a28b1442a0faa39b47aeb5f7ea4e5cf" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "adc85505b70bbb6425e70db445fb0acf" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dce2221ae4b3ae416dae851f5b7695c3" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a4ecf6c73874e003eaf3264f354ffa7b" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "fb0cb52439364ee2a2afde4ad8442913" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "35f84ec9f9fc8497b5e5c14eb1a8f895" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "da0eddee708eea9e353cdd4f1c03adf3" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5deca5c5585dcd4c3689b4d22d0dc20d" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ee12492635e656a68a9e96c44fe0f674" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "775bf6ed4a86690f5f4032745c0229f2" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "38bb42161c33da6b440c42cdacf49af4" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "30651ac3920af4bb541f7a505f56f3ae" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8fceb7dbd994fe1a6f45449380c80ad8" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1d7122c665ea01f85ff7652aab72aea2" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c47e2e9178cc9108f576faad7a79f035" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c601bc3a21999a5d5f5190de97851359" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "2cb2ba253a284b0ee51260db29b67bde" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5192c0fe72cb2f3625edfaba3c1fa5fc" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f3487cac9117c92d5c8321dab2eb81ea" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "05a59e2cfaed204563f4ab0a6ddc8245" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a31403356e04b673394aa3df335f55bc" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "74a897a0a0eec0febd137becae2687b6" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "22287387727d5506a1b84496b83019e2" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b936f7310dca17b785f2f987da36f14e" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1e434a549d0cc540b447f5e8f27dc16b" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "632bbc2e95a5f7b8a1395cc2dcb64b48" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4fdddec7d766639dfbf74b652d16beb4" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6d281132b07856661d7838a6c8628d58" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "c45aa0e0ec14e1a2971cd7ff35b8cd57" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "212c3d21b5d505ca17d3129aac781233" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5a6892ca311c57846bb38258d0204ceb" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1d7f7cfc87b3185cfc4da44cd6263a66" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2593c61ef6f35be1c77775f7743fd059" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "84d17c53b78cb7ff6d23a3ed8d67fe97" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "e8ddfc80d18dabdb282208572634eeaf" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a8f411d6c038ad1db5ac0107b3e92205" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5ca98794733cecb11c342742abc98f94" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "789486f3d79c226b66245f4fdcf155dd" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7bba8424fd08f6a07cb681060bed487f" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0276fc07de4c2d1d341f2683dd2f7de2" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "56f326ac73a4d85ef721868b672fe423" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "50c354c6382e8e879cabd7d6c1265750" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "eaf5bafa8ad0333c0edbaaf13e4dcfdc" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "cac4d8f44e1f27d9b86f3b91971dc6f2" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a75270a3c935e1bbb1e7599aa9b3f17b" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "84cf1c86a51b91842759c7327bae730e" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "96ffaa6bbd935037066dfb56b1ee8f45" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2574c5f8d7033ab8017c3bd0d4695098" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "24dd4a1e2eeb7837ca01e901b4b23cb7" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a7c17388f4ecd9ecef3f943826c90c9f" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c92090b662c905c33e14262e81c17dfe" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "450f60efae95516b5f1a5491ea2f5272" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "f70d65389ecd8ef65403fd9f327d5b89" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "508c4c81cc3edb4a46b5fd5bd05c3ae0" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3f95031fb5e33e76907fdca8ca202653" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b8891929dd3f2b947a30c95d21a70ead" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fa5d55a64b74ceb87755da4cd502cc70" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "76995c0baa0b9bbae72ab9dee1b688b9" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b44c86b54e1f2ad20fe6bf9f5310257d" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "87749e50f4a15e740564443bc7206226" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2346bc93311732ba463048832d7fb14f" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7dc057a23d0e256c1225bdf79942dfb8" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ca22d9c8189f3b2a234cd1040b81d1d3" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2bf9efa4ff741f3c1668b0bbd7dce64d" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "4da3db2c1849786b3800be563983eadc" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "03e0ab08b74532d47de0ab8ac96af7cc" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "844d38679bbd7264e197b2f668a30d40" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "75c41db938052d55a768b8e4f5e82e62" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5e82f5eee4007c041e310697a200bc21" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b3fd49da5eb5dfd8c402ec8b641e3620" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "9dc19ad6d84ec92885e7aa5628cae593" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "01905ef7ea500af8d3b1c433c5e278b1" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a0f2f3527e41cdb687f593c996a0902f" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b1fc26d3cda131dc68abef5619f63fa2" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d97436874e0d61caa84f6125a49541db" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5bc0b6e50c8108f739421bc193ede759" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "323a7e350b123741120160c10f000923" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "760176dd10a0cbf82220ab30aa487c9f" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "42778a8ff2819123a34c821cbad53c36" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0432c8b76a24d41a241f8690514d4e54" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6fc8638071d8e8d180940e3c8df57ae4" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4f2e548248e6662ff2091a0dc39224d2" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "84f744a18b9a56aaa918283bedfd20de" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7fcce34450e2dd6eeae20d572c1277d3" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "defac3a22c6bbea92d7bdcecec0e7c16" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e033c31aaf820ae501086fffac6dfe2d" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "70c553b4b1ebd954ee1427d750bbddf4" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "de6e7dded1820a2a0fe4966ba5afc6e5" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "0252d1abe63b1d2b4557e8ed72722bad" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "793b5a2a9ad432e77fd30d6c657a9795" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "69d850974d25b4b7344a014bf1dde45c" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "10742e89f3ae3f9574ac1d9840339d6e" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "25c91f2287c60a0695a70e8e92381531" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c001601fd9bf61da1f2a6991d7ec167b" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "f6dd88cfe588ca8ad084fa3d285add3f" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d4816b777836c3db1ea00e1b45affd2b" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "987118b87987af9315f17bae722aebd8" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "23c05d697f872a38effc2683d62b4524" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1515d8ce24a06247cc8cf464ccec28d6" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "62a9ac49ea28490efcae688a5eb25f11" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "a4352a84ef276c065f0f212f8a202b1d" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "250b38a228bed2d3fb41789c4ebbab92" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "86d72b37795edc582218e642d3e7444c" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bc450c918e9860c8e29bed68aa3d13c0" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2335562432dbf5aafb8e3f378661f2ca" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "507c0c21a351ea4c7f410532005840d3" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "971afdb4e043fe92e23121586daddd73" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9499d99ca4505458a5efb996b22998b3" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "69856eab4c08c543ebf42fb8d38cf0db" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "660de9631f129f3650dc5b057931a145" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bd66fc91828c24f3f43b65c619ce6b34" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d99cf8fa93dba9d30194441cd3fecba1" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9eb707f696e4defab154124328148c5d" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bfcb6e97598865988f6bc0eabd27c3a0" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "a0327abd537138d7b7501038bbbac11b" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c1564984fa6b1e8beb84c8fc289e76ac" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "36005409ec4a1c7056fa692784de4cef" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7d6cc35c90496657b7c080cd0caf2427" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e8603736c0ac94e41b755028d29c3cd2" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4a58cee836f713e4ac015924a40431b3" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "da6a10505a118443465970c9f553e859" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ba6e560de23a515920f107280a679c62" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dd4be3707bff5158c04076d2e14ff7c7" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4609a55a4027d777c2cd2c2986d36faa" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d8352320ca1fbe24c5172d33c03ef8d1" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0411e2d1e400d5c8a0489f230bc57f79" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2b5fe15b19e254fdbcb422e415763f3a" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "aa8c39e53a6362a96941f6bbd0405553" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "86ac1dc80f551618d940c6b6081559fa" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "43f47df31f599a8a6cb64fe7fcc83af7" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b1418734272f95272c5465119652bd66" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f1f33330416b5da08eb67701f0a5fc6f" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 29409280, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 14712832 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29392896 } ], "md5sum": "36e7599a259aa948e51535bdbdedc81b" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "be9fb36598ea44727764b230733f95f0" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "734e1951594ebd3474b32751f2082826" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d37d21e78b581770f301252bed873f37" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "480b6b3f7d2d3c72c48a008d5a3ca0f5" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7a8110a164146f33e9faa70e689a5ea1" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "399758c33cbcc2a233c1975b4d845069" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "5f54548234b82b882a2fc2eeef77a25e" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "84b976f1c530191d0c960b2d25d7c111" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "40a10eead770b9ac914e870a85c8f8ee" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "94ba05988131d360cc1bfde14784fb0b" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "93605aca07152de86fd157c1bf7ab0a4" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "16ba0ffabfe0ce7e761913a4f12d5f52" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "66224de41405fb6c3fd605bbf85c09a0" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 9437184 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13631488 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 13647872 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28327936 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "276d129b50512ef2ea5a2689ffe3dfe1" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "196d92d6fadb7ea2b02c11a56713d5bb" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0bcc7f20f3c49d191536c529eb4daada" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c6d1eeffa260fe9954e1141547a027db" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b9ca0a369aa431ce0a65530c40a05626" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "134a36d29214640e72f9a13cc6ec8cca" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "c92c54f71702a0b72649c56dfa24b7f8" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6b7fc1cebe5bcfc3aa75aedabd77fa87" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "043681bdda4f0add1420017e70775130" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f7fda193b36467e4db0c6c350d3567ab" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "764293bb9836a36a78fcdcdd2360c182" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9bdb4ce4da4990c39a75a0859db50422" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2b5e668e2483da973658e1f6825b5e24" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "cd466f96c7218532453d2bd7cb858601" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "385a10aa7fc1299042dc595b0cd45822" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f83811e3b14240c96396f93f8e0f71fc" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "24ac88bbe9cde1c81ca40b56f755c520" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ce05beb8ed3621d5926ec7eb390b4a32" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "a4a5bafd04b820acaea85622df7eea9c" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ea5142d9fb8cc04bbaa1b52b460d3760" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4ed85ede02e9f762e81928ee37cbbbb2" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "acb150cfac458cb0231e4e6ff6374f68" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "948f21c9c11d6e7e318d1ddd6a88a888" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0cc26d01cf8038a0c8fda9b07466d00a" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "8e3eeeda897f91d872c0135fa795d486" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "31dd0539d77366b919de0a83bcedb208" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7207c9a80153e8a414de4791e14d37a1" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "29c28c5e5592bbeab0b5771e4b89a7a8" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "98e1ad7adfe17e3933c6140b8a691d3d" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9855822596388df4d9a65a15a043def3" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e1326bc8feb079cc4917f6ed8242c69a" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "53926a59669164f3ca4a741ddce45dae" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "28f59e7bb6154b4f51c05eabdbbbd35d" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4b4f7c76fee11a5ade70b34cb637863f" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f031ecf7e35e7205df4277d72ed412d8" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d76ef12a0466e2b106d36adb8686cf78" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "8c4ed2ee5706473ba9514714f643319d" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "42b22a209e1229689afe67a7f2fab003" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5565481416a177cc5f4a0d342793a819" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2f3b7d847917d237dcbf08cca41ac484" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "101b9e315615f72ab1315503daa1e35b" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "02a838342fa0c203e785cf13684eb1a2" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "89c48505df8465934ed42fcada742786" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "58481fa2c1f1688a2d7a153164a25029" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d266d0e4dee1ff1979ff69235a13020d" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c9fb5aaa78b33df3af6de31cdec31c2c" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0832d589362b0571503d17645322078b" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "309a176aa1ddb3c8f950cafa1d266a43" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "850b8d02fed6075ada689a4543f8bab1" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0454d9a73e5bdc775b1187e8e536099a" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "29e0f69e3d8d40a85e12d8e63f05c0f5" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bbaedf5b433c6d9ea58c95dc5482a170" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1f288f0f3abc693a78fe1faf1330b621" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "73bfa0bc135fcf49063335c2c9c5a6c4" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "3d01d6735825b62185d1a77bea8dcbdd" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "368eb65edd4a4ee46a64db3f2cff382c" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2dccc5ffffd2b4721105b90d188cd450" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c60b6c979df27243a1acdc98545358ec" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "15ffe8b06dfa1aadb44e52ab2c5c7aac" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2a63d09159a9e707664da2aa52058ae6" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "085f19b66e162731080d43d80556c569" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "04b68a2d479a47395dde5c05ef45eda9" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "31dbf5e268b4793854003edb58106194" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "66361b0dc60f2d361e58d15a4334d8b6" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "db7aa2593893acb00a944ef6e09b210e" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "eb2e81567af62c2d7f4149d3e7256adc" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "f5b6c270f5c63f72029829143cb28c50" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1c7f82bbc57731e5c15226753f0fa637" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9966f3b75fa42edee7073338c4bcc628" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "14b5fd7a9d55f0d5dc9d9fdce1b80201" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5f3a0ba0594e61b2c421c4a170fc10f8" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4bd8bc659da7e869efc2ce7384ecd6bd" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "3567c31ed98f662c638b1e3eed62be0f" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5140c9106d148c890e7c34ccac40c7fa" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "000bd209222af59d9c8e202906007c68" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ee06afe3346e36267ee8fefdacf30e0c" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9ab5ec9917d2e8ceee22370728619d79" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d926b61b9158636c09d90fef9a815ec6" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fb49791ca2183b205c23ba4b9c59040b" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "06f157d7449172901f4d701f018e9b0b" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b95f88269a357944303b6cb3fb6096ff" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "d1356929155b5bef76b5a6e4baefee4e" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cc205b0cafbef313a65bc5bab4ff346b" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1191f9443dd0a5dff6c01ecbae9f4703" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "bf5205177df616ef0cb2946e73e2f71a" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7e38c756fee37fe3a4ac9aee61fd8a6b" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "83df5f13d1cbd79728e6885a1f40db1b" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2ebb5e4a7788a02a2a92616fe4e9ab74" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a6c18542aec7da0eae0919569b955f4a" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c88bbfde1e63e24aa8b20bd9079f9b3c" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "316be3a5badbec4f580eae5a7b03ab94" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fb7841f4948cd5487786654d7a5c5206" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "447999bbc2b1cf8a850e2126a3d4af13" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f2dbb64a67bb61abee9b43b82332ab12" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6dd5728bd45afd65a9297d305714d313" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cb089b143e7ade9baca4c1232df27495" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "33b6535834a814556acee94a408e6a1b" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "516a6f4e10fafda4f8d13e19c364f273" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9883146c75ce8daf5607eb0c885a3d79" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "592290f2e1a4d9fe66d7d366df8785a9" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6c0af2918b7337691ee2981ca59dc7e8" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "018a032a0a1717842d2cfde43b59ebce" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "83ed7d909896a19fb63ffdc77500b066" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "adc2c83813acb8eaad27e5553cea31f0" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "36b8498213e341cdb4cf3ad31ef9206b" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e5d70f2389386d378ef6f040797be8e6" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4b709a563cfcf58eec674a4fe27ac3dc" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ce640e5241778a824b999911fe772eb8" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "466dbc8ea50f6433003c5f5f05101074" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "67b3529a88baff68776b19336d81d63e" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8765e23c64cecbb153342b6cac43a418" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "53cae3c2b040ed0ac13ab07c6c4fcb20" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "02feb637af307477cfa20056c660c526" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "18f44fb008f755ac90b4f659fe15ffdc" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c7b3483d728a46c78f61674463644d11" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "694211181241c3825fff48fbabf24ce6" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c081db3ad22dc9ebb18237dfad2447ce" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "afe0752e38a85d2b85892921dbe74b1c" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "162e42c8a0bca95c2b41947113a2baff" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bc41387f0079fa82ea9fd69ab8ddfb59" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "1d68fbf942f922787d47ea06e9f9d95a" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "86b36f156911577f44251a69eb008a60" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "698949efa7e9cee28cb1ddb5cae45fb3" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8eb0b467a55fb10a12f742cdaf61af61" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b03f77d54336293fb04df2b88a9a0c76" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "50516a337cbdf51dd6f1091df84d6410" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "189f8a79b846a44f313b4fde7a1ca184" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ec854115575d0fed9971cf6c03bab6ac" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "eaa86246e96001bbc74f619dc6dcb5e2" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2cf226b03111964c57e9eece16ca6b1f" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2e3beb63cb1f85fd6b12f258858fb207" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "08925c49bc61d66f7724e085a735af2e" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "0a83b121e80d88bbf9d36672dbe41e7d" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a17c56480f9bdb1c55992f49c22c2b59" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f092a859366a3a8c1597ef00ade00d01" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ca75904f7a3b8a047da99d961596761a" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ae01b3c5ad4f0ee5c35029f65756ed29" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "961ca7a1bbe7e2ff3c44fec910c926ee" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "168c07b256b55d730bebc486a73dcd69" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "efbff746a19f89234d8fbcaabac01896" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "608ef1382a555a3b5e79c3d731eb7bea" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "04a5e10929172151f8e260b2a141a118" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "56f3b4d61c21c42acc29fca1c0f62dde" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "50254992b45a2b22485d2dc6c9b20aa2" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "812deb20c9f2cabf18dd7f97407d6ca1" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b91953e5836fe64bbaa813e6d4c009f0" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "95873a46b736f63ea4c5f565eb0da8d5" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "a457940bbda1b88dd92c9b30d3f6e22a" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "554dacf940753680783160f066d9aa7e" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "01831c22db09104aad27e60dd926d292" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "df4125a38d6968ac72e491a4a2f10ba6" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "dcae2c94378505e77895b7c17724ab7f" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6fd99fa54d2cea767ad57a28536bdccf" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "76b192d2d72a4ed6d454de358baf8234" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a8fdf2adb9384d796d6604c7ee5ade45" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "debf28c09da7000981f3bfc310f2d473" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "a54dde544c3a2d3b1f28d99826aa418b" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "484f0f38f8b1194b5e9f2b0ba0f94038" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "17dad9be3f2c6cbb0db1f657f6eb3b57" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "efe904ce28a3b5b12afd1fd813d1877c" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c7ba6b34343cb5632154f17cb7a218ff" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9d8f090da6c921e18533cc375242207a" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f09a3f8cee31e17d02010be8cd4f3849" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bd5cbab383b0ceeab722662d01577b6c" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "969db42a3e0755b7d42e60d52001c18c" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "f1b116559e60cd5bff2807f392b6a912" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "98afe8b007124e21e8b6c0ef80f3c24e" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "83a841e29a1ce50ebf081b3a99bf1302" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0b1d3ea72525a92c09ebe84c1e51d96f" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "22c9a70e1ce5d0fb9b71778f8e35fd5f" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b6f7ca4dce0430ca62695ddd664c6c20" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 23117824, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18907136 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 } ], "md5sum": "c89f796b2dd7822f5c0a364e0c0b02b8" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e61e1fa342f11168a629cf8b21fafce5" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c6b06a413a576f8688acbd58742ec4ea" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8c4828635f8fff8da76faaee9b8cb2dc" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2b8f82ac8bdd20c3584505ec7ac8feb6" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4835fe91663a351bf7dd8e13b86d411e" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "c430b93a5c318c9cee5f91cc1469bfd4" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "aa33c89c3fee60f9d181bd4338676471" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "83c72d968f67b9a330c250733931a7f4" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2dc7569ea9c8818a8583a70b99a538e3" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2dae8be84f966f22aad78cb066f98e85" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "50a91865d64d8f0cad95cecba3d8527d" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "21ebb3b9e20fe7e55966eb0c86ceed68" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "12ea96b1d3667ca9b75dc527a3de4aa4" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0499761dbd897f8c0cc5cdc9a6f4824a" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0fc3a12668d4e76e723db7f61a338a72" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "54a110ca119fb7610e08b9cb8f444763" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 29409280, "records": [ { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 14712832 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29392896 } ], "md5sum": "54274ed06993e958b1967529d63329ed" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9934075c0af73b849da2f3f8892d9f92" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "63ff7f24eeba87901ec56d2e1cf1a176" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1166c75c08b3756c9adbfd69bf8d4f46" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "231dc21f699853e1ec8bf9f6bca2d4ae" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8129d78267b7212040fd49f46122c0d2" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e2bfe14d3e76c907b04cb2fe2e491d91" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "e133e58c127a4c99fec1cf6bf12150bb" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a6fa55e4eca26bf4ddd75fa0bb853550" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ccb22e4d8aa0b34cb3908a4796463797" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a2cec1f366f34b9fdfa4666c30432c60" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f20be9945f9763b2fec92220bee955bc" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6b79d197ef0180f1235e8805862c5b9c" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1249a5830796f7b31ec0ebdb2d8a7db9" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 9437184 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13631488 }, { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 13647872 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28327936 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "484d427b36724a6f82df6524c44cf9a5" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "af6ff1d69b438eba012ca59ce0c1f021" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "654a9a6fb73b94626f474d5cbb2c2555" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4b4f82d74692e45d95eb59810af31180" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cce0e3d4ea758f2fc5c45cffd07de1bb" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1ac02f4044fe0133e9e2f6b462b72df7" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "19a036526ca316a42d4b8f957af6e16c" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "13e522b42f764093118432ef352da073" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "565c26536f3f98bfa89a8410cb17f8d1" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "236619eec116391932a771794c35785f" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bbcdb37195f0bed778681a2310885101" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b9d79a1c69c0b9c65a0390d8a6a76ed7" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b7f7b9ca51c8970e29894354e332e325" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "fa173ee368f69af7f031ff08b552e80a" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d0c1d72898dc2fa2569347d356518bc3" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "02885510715f3914f1af4c0c73e8e049" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c2f7fe468adefa5d0184b8be7d836cea" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "63de4ec14b20e09a6f6d7f6ec6873445" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "8e95ff044ea3d86ebb5def26a5272e89" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "aa4c688c780d8f9e66ce6d58dfac03ea" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dfe0e081c74ed3f41de287f6d19c5728" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fd9ccbd9d781e02a6b54766f27609e62" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7dbd0edfc4f43afefd503fd118818198" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "696cef7b589f598d28ec2a1fcf4d2e3b" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "cd1d9648b34c2d391224e26cef9cd3ca" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3d02efd78f15d947b67295fc9ade1e69" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a5e98f51a031157d3743780f4cbd79c1" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ba33f8db8de7efd1f61d8f41ac50fbbb" }, { "dataPath": "params_shard_480.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "65adecdd6e68e70d2358902ad1338415" }, { "dataPath": "params_shard_481.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "62ba50bcb85b7b6321e21dc25356efbc" } ] }