riczhou's picture
Upload folder using huggingface_hub
a1b6c51 verified
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 5019811840.0,
"BitsPerParam": 5.000895173865207
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 262668288,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
128256,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262668288,
"byteOffset": 0
}
],
"md5sum": "9f96c20d2554e4ba229ba164c1211aeb"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 32833536,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
128256,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 32833536,
"byteOffset": 0
}
],
"md5sum": "69f46f69fc5a1d6bd107e962dae64289"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0d4531d1bd6f04887850c7b00e093a1c"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "304a5294ef017c5d0abf7e762210d820"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "fab4a83986e8c51fb2b21713d6be5c79"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7a71915fa34aa7a8fd524d1c01144267"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "f586dc7ee826595ea0af0503912f1ca8"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "87241662e301171c5bc5c084f0db2232"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "cd98bfaee293c0a7ec27e29ecf74fe19"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7ff50bd19a05891353e556b4af8079e6"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d8326fbdf26aacad5356f443e65898cf"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "21808d5b2929ec6b4875f5cfcbdc8ee3"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "79e7045f1944857762fbb79760eb72ec"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6738591820815025408383ee576a3108"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "0957884d4c1905c7e75d04e0c94405d2"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f13f2a3ee5ea9e1a45ea469917c078bc"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c1e66dcb7615cf4b0b6679ed7c71bd48"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "f42a379a542c99d7368894d36d892344"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "16144779c8b249ad7dd89160088fbd24"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "d191e4f08ad9b594fb0229bf5993b60c"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d2eb54f7dffb7af2556ad389c7b0fd01"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "29f2778dc9640a6b2669e103b8dd0de2"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "00e06da47564f647d939d466abdb0015"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8290efc16f895efcab5f4aa89603c04e"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "79850a2b2f87c9da632b950e59d7f6a7"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d3059396e8ddf24ffa2e0a8a7e53fc91"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "777b70f5f1d292646fd67bb5b2d50367"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "3e34dcf17ddd3252c68ea50ed131756f"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e5356e61a8aa702f978772b873b0b501"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e513547d5b22dd1d7d0fef601f50e2c6"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "c7376187c659a116df53bba509c1726d"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "74c6bc06540bbcdfc60bfe76065b1b32"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "fa36be01dace9172c48667a30a9f7c30"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7bbe1caa655dc241881ef57a21042270"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "48718e227709df9044a5c5c17cc003c6"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "107db8f959bb6bb2b481f97b40eac7f0"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0c432047a0d4d4c1057ea936fadc283a"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "07ad9aeeb6a7a314c566821f2f7b6ef8"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7f83e96749b388baa3f6acd956f728fe"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "275855aee6d3b01a0d90f0f79d2e87a5"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "319ca6e489e8d4207a7a425b27cf44ee"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6d01c0192a105fb50773aff5aa59af90"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "764cfc6ae4a9f9ef7cc2eb056c70507e"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "6940f0fa7be6dc1582c6a9057a6840da"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "60fa2b928de8abde7ea94dfa79b6e168"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "ee137c21acd19a8e0f856e8ed119b4f9"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ceef547a4ad5b2e64d271184aa53f863"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "fb804a87949c63bbaa75b79f0dff6089"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4bfde866d98250a820c3d29ba1b7a949"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "77db8c4084f3971de7a314d483f19bf6"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "23bb870437a82d2d523d0665981f9f06"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "11b517be05d9f566a657b266eb5f7f35"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "fb9d048837b6265faed9e386283ac65c"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "0a0966a35bec3cc0df22e6cce02b0129"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0a0229b23e5326b78e2f3a8107aca17e"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8a52c7f6e2c5afd86e5a428493b64002"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "f59ae2c230cca7d28dd804905a6d48a3"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d2fa337e3f854026dc4613576f17783c"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "ee740e10e46b38b66f815c7189406191"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ef90de237d9de8a66c2c16582dd0a35b"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "7afa0e7256446c2b0dc0bacb803c8bab"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "80ac47d3bcf29f02084914a8b3f5c26d"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d9e18d71474e8f0e6d7c891229cbc733"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "91d052f0588de0b897e9f37a33153709"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "47d38ce51b4880a9b93fb41708c29141"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "241da0f1616fdd89c0905b3d80fb8f77"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "4367aa0074b8b2b0fe6b79606900c26e"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1b69289aa4fc4b87df89ce5a687177b5"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "10250c86fb54c76b559eafb1ce6d967b"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "379493a0a127b8b6787b3797b3bd4a46"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "8acd7ec32bf985603d360e51b357ce51"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "e9f7dcc943d8feaa539665f0a16d8fdc"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ac7c24b31e8313525c11aa12c179aaa1"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "cfbae9a3aee053e814d1146a560089b6"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f8ad7b4090b0b6fc3e1d8b1ac8948f3f"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "bead9dd9c51c191f2b508216adb35d65"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "eb8913223ccbcd50000836327743984f"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "efa7fa8f7236af7ea36a36209fc69406"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "bfb3997cf87a5330a0361597b62ce4ad"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "c786f0493fa5a83eacf8d5e3ac8ceb84"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6bbdb94d2855b1c7e756b20347179a13"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "55dc7141f126986e0af9f4107787703b"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "163cd27970e4e9ff7ee1fd2a2b49b945"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0b28de6d42bbf3fe3a716cd140e31cc4"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "986e9e3069ea8e066726e7ebd14b3057"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d605979dfe987bcd3724e7a267562f1b"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "2054ea1d231bd9c9ef99ae10237f560a"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1d0950556e804d3d16fbf709e4f91493"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "de0a43661dec2c774a3d3927bb2012d7"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "6ff0b8fd9071627d429831407ce03870"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2de59e4d2c5eae6a9a1da7b71f19f70d"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c752961312f43318f5225cd4dfae506c"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "efdc6c6a0c5f72f86f1887f4ae2b53b0"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "13fcb015c3d19bfccd3a1f331ce9c796"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5d4062ef2bf0a081b4898a49bf7673b5"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "a453b411a45ea8fea4f4010023dac640"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "442deb89b0d66a03b7fdc4fb3b303e43"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "97c96230f38610c413760233a1ed4631"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a91f7875209e0f8342458ed89b7fd8ca"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "796d6b7dbd49d0f301ccedcfa70f9fe1"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "45587cb3e71eea08d603afa8caf345dc"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6c60becfafefa6d33007812dbe98e591"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "32381eb189bd0da0dbb6fc1c6de54c2d"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ed550baeee558b86b9cfc4bc75598350"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "fdf16829f3ff27ff61b72b6d544db588"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 262668288,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
128256,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262668288,
"byteOffset": 0
}
],
"md5sum": "d4c537313390a8329803423f36183ecb"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 32833536,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
128256,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 32833536,
"byteOffset": 0
}
],
"md5sum": "98f616a4059d040de6526c0b8b561c37"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 22044672,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22036480
}
],
"md5sum": "46872649f8c4ff82c553527557428a75"
}
]
}