{ "metadata": { "ParamSize": 243, "ParamBytes": 3770683488.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 388903680, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 122760, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 388903680, "byteOffset": 0 } ], "md5sum": "5424902e966672e163b13a12becc21b7" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "da075eae455338050a9cc0919cc47190" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "8f2d8f38944fea709da9bdf4f9a19e96" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.0.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "2ebe7c77dd853a0fbe190d6d1777f2ee" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "8d402ba3ecad6fad91d07bd1f6903e96" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "de8846ca164a4c36e72d6643f79e6841" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.1.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "ff6adf1dbb95217c2edf2f6e0783dfbf" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "8d88972ad3e6f0cafe915db747d89974" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "f87273671993b30f055a0efa88011259" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.2.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "97850164831e0d0ca7389c9264ae9566" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "a3272de3d0f8bc568967b3055e31d496" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "f5d095d76da498007ec39485d0a2cdd0" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.3.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "d30524dd425ae1ecb8a90ca00fb007b4" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "b53093ecba11550816894407b5f23f89" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "f465f88756a59532b03fcae03eced213" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.4.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "4ccb8538c5fd88d55948af927a1ac1bb" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "fd4e7f3ebb5752fb37480b93a304b416" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "0784a80db75dd4812a3962cb79d52e43" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.5.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "5b6b809d5cd211cab6906c3a2b35d72d" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "6d6e025a7c9a2901c9cef1fce10a15ab" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "03fc310850a81b86fc48a178dedb746c" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.6.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "2ca04727ca7180ad82e8678d05ab547d" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "b5271f3eaa62703836ed364db3a331a9" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "66be5651d933ee0242277c65904b1ad4" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.7.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "f64e50b000480274c4115e60c46f2596" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "d5a0a3ce5c83093a3c821a2f216875b3" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "16624c0621ab5e7459e2eeac9f54faa0" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.8.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "ffa6d036b48233dc14aeab387c8cddc9" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "f2def704d3e32216992e8738b69bc5f6" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "e7c2b7a52f372dda4c281aa93905e00c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.9.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "e15a4fd16536de3e831d63c798c2fe0e" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "7e407ff48896e87fc91141de35d583a4" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "23bf631f23071358b7efc7cd7317f7d3" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.10.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "88f7e594613118161dcd21fcb9cd960c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "d6bb940e0a1375e667eb156e4161ddd2" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "d8227fa1b74bf2898dbfeb0baa27a667" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.11.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "d68d0448f94ddb73a59cbd5bd79ac7c9" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "bcf3b8db66e58d54c96520a665f8065b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "e29882cc3f9cda013f07bcb1356a80fe" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.12.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "a3b16776dad1b22b54ee337999435657" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "b88d51acd777bc2e55bd10f959f8a93f" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "3500f23cf748b1a56d49b4ff49f0365f" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.13.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "33abe5b62d470739b5ed78283a044f83" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "2765fecea71b0360b175897dd47923ee" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "4494fefbcd175f2519bbdcdacdda310a" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.14.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "4a178e7a287c0c76e6a380ec56cbe9a7" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "55245b4674b869c2f9185c8eb0286588" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "36ecfe029069b47fd6f1134708158581" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.15.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "d8e911df951bdd5a082e47a23edb5087" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "2f96f096f35797dcd331a1b7e7dc670c" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "c0ed78fb5cefd70b4b751e5356b17799" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.16.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "4d36c70f5b4e40ccd2d347efe2c51111" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "8e8824562a1c993b727a19974f488505" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "a1eed9fbece3b48506a837657474ffa8" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.17.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "62874b81026d07a4bd8e80d29f15a81a" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "ac9872ea8f33b7151a10c53b86e5d073" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "a456578bca4c54691ceb8865a389f7a8" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.18.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "f63ec061066efb418f5e2651ea149fdb" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "65025ea5d3cea6eb028d6f8c46fc62e0" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "4abdbb11d23e66ad3ecf990a6bb363d1" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.19.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "a20cdc6607f735c07473849b99dba928" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "511c103b536838c6c7ddd25e2f7d1425" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "ac823cd64add465e41a6be2f1ca26db4" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.20.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "5361875704d7d8a78f23a76af6c1f5e3" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "41203a108cdb61ac99dc3de98e6d2d5b" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "b867c3b5a479bc228c780d9b34470112" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.21.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "b8af68a9699a8b5d2bad49d45dd9cabe" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "0e6f76f83caa0af084aef01806dd672c" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "a1427cdf78c61bbdd282221d50e4672b" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.22.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "20350533c7620c96d9adf2673de0426d" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "a49da40e825d8404eacfb4034fbbd8c4" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "f1cb255ccccfe1d2e1a330ed5979e254" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.23.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "f2bf016580101897aecb7bd7bf1c0a63" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "439aa06815f2fbab2111440df9560af0" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "acbf2065d4bc07dae667ae43126557e1" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.24.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "32a264c52765fcea760d61f7e1f5da6b" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "4339909c51f39e1f5407d780cdaae24b" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "b44044761dd6c0c8099c3480bcbc5b57" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.25.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "6367bb8bb960915da18cb3f652319405" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "669fe8700061ee2561e10ee370ae0c8a" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "3b495124a1f55dbef1ad35083d305b50" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.26.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "b4daf51f329762077bceb0bf6a6b0e02" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "6b7c25df1f319b322323a8568805ce76" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "95ddefef00287ebdfd4d04a270baa67d" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.27.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "e746879f720ac3cc5202f9d506209f58" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "c3fcdf813e3e531be209ea664b45451f" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "70090e4949c2df99d04a673dda5088c7" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.28.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "87281f021ad408df423e73ca37da8d4e" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "792ea2cc17729948fbfb1f5efa4f8283" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "02b09f228c45c9516e116aa894bbe547" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.29.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "01abcb1cadc96a2680e7d46329fcea4c" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "c91c09a0bfe183a5782b5d743e93ec70" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "b87d1fd463ac311a58631b42c1411f34" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.30.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "20717b5ab570bb4a2da3bec116c0e724" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "4eb60e8ae2648f3917a2731487dbcf85" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "cdaea89da476e9e7e5cf23670cc1e77f" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.31.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "894e6517fc687d6b12f7ee6f3e37f17b" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "5b0274f2671acde02e7e149324587164" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.32.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "cbf3f29332c57967f07d73db12a3cdf7" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.32.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "512df7dc1b276b61f37ef3bac8280d7d" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "719c0fc160601c28d6c76450a2128138" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.33.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "bd15501cda643976f06172001e960c39" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.33.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "be7acf9053baf8c49e56c8e55e1c1738" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "153a3facce91de97933288c2680c2142" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.34.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "482b1807b922f621fd61b66aacc1a7b3" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.34.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "341b2d4a6d2e1881ab41809feef8b216" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "c9ae89585b805e137898ef5253ee89ba" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.35.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "b2cbc54d8c57c040c213edfbeabe39a9" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.35.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "8d0aba256987045e77c7cdd8acac137f" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "448744ffb049f3450092df4b97c261e1" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.36.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "7d5e853471f03df39ac9510d02fb6fc9" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.36.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "755ea5a2e83ea85e906fe0d2ae77369b" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "74498e95bdb634b758f6a51e1be48c6d" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.37.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "cd8d7efa88c1534c956eb5048aa96416" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.37.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "9160ad34620d2c1a21c3509a9b33492e" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "31276f010e512251265db1ecc5314be4" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.38.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "3bfd0dac94c404568339328931dda60b" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 20078784, "records": [ { "name": "model.layers.38.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 } ], "md5sum": "019691d999c4c68a6e9f5758ebd3a6d5" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 36495360, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.weight", "shape": [ 11520, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36495360, "byteOffset": 0 } ], "md5sum": "46ce83f72fb4071cb54c7649bd6b9cc5" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 18247680, "records": [ { "name": "model.layers.39.mlp.down_proj.weight", "shape": [ 1584, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18247680, "byteOffset": 0 } ], "md5sum": "feebc8623ab88876de3eb2366dcfce7d" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 388903680, "records": [ { "name": "lm_head.weight", "shape": [ 122760, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 388903680, "byteOffset": 0 } ], "md5sum": "02a95b3956cdba027d5589b5d9a8114b" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 20081952, "records": [ { "name": "model.layers.39.self_attn.wqkv_pack.weight", "shape": [ 4752, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15054336, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.weight", "shape": [ 1584, 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5018112, "byteOffset": 15054336 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20072448 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20075616 }, { "name": "model.norm.weight", "shape": [ 1584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3168, "byteOffset": 20078784 } ], "md5sum": "8c76431e7a928bfa50d6f30a2287b4ca" } ] }