diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..56f8f607add57bb7f9c2a5cb5c57865e1fcf2b31 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,9 @@ +{ + "<|im_end|>": 122753, + "<|im_start|>": 122757, + "<|tool_call|>": 122756, + "▁": 122758, + "▁": 122755, + "▁
": 122754,
+  "▁": 122759
+}
diff --git a/mlc-chat-config.json b/mlc-chat-config.json
new file mode 100644
index 0000000000000000000000000000000000000000..87c41e2aef883ef4f05fbe0803021672d9bfe7bc
--- /dev/null
+++ b/mlc-chat-config.json
@@ -0,0 +1,86 @@
+{
+  "version": "0.1.0",
+  "model_type": "minicpm",
+  "quantization": "q0f16",
+  "model_config": {
+    "vocab_size": 122760,
+    "hidden_size": 1584,
+    "num_hidden_layers": 40,
+    "num_attention_heads": 36,
+    "num_key_value_heads": 36,
+    "hidden_act": "silu",
+    "rms_norm_eps": 1e-05,
+    "intermediate_size": 5760,
+    "scale_emb": 12,
+    "scale_depth": 1.4,
+    "dim_model_base": 256,
+    "use_cache": true,
+    "bos_token_id": 1,
+    "eos_token_id": 2,
+    "tie_word_embeddings": false,
+    "rope_theta": 1000000.0,
+    "context_window_size": 65536,
+    "prefill_chunk_size": 128,
+    "tensor_parallel_shards": 1,
+    "head_dim": 44,
+    "max_batch_size": 128,
+    "num_experts_per_tok": 0,
+    "num_experts": 0
+  },
+  "vocab_size": 122760,
+  "context_window_size": 65536,
+  "sliding_window_size": -1,
+  "prefill_chunk_size": 128,
+  "attention_sink_size": -1,
+  "tensor_parallel_shards": 1,
+  "pipeline_parallel_stages": 1,
+  "temperature": 1.0,
+  "presence_penalty": 0.0,
+  "frequency_penalty": 0.0,
+  "repetition_penalty": 1.0,
+  "top_p": 1.0,
+  "tokenizer_files": [
+    "tokenizer.model",
+    "tokenizer.json",
+    "added_tokens.json",
+    "tokenizer_config.json"
+  ],
+  "tokenizer_info": {
+    "token_postproc_method": "byte_fallback",
+    "prepend_space_in_encode": true,
+    "strip_space_in_decode": true
+  },
+  "conv_template": {
+    "name": "chatml",
+    "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
+    "system_message": "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.",
+    "system_prefix_token_ids": null,
+    "add_role_after_system_message": true,
+    "roles": {
+      "user": "<|im_start|>user",
+      "assistant": "<|im_start|>assistant"
+    },
+    "role_templates": {
+      "user": "{user_message}",
+      "assistant": "{assistant_message}",
+      "tool": "{tool_message}"
+    },
+    "messages": [],
+    "seps": [
+      "<|im_end|>\n"
+    ],
+    "role_content_sep": "\n",
+    "role_empty_sep": "\n",
+    "stop_str": [
+      "<|im_end|>"
+    ],
+    "stop_token_ids": [
+      2
+    ],
+    "function_string": "",
+    "use_function_calling": false
+  },
+  "pad_token_id": 0,
+  "bos_token_id": 1,
+  "eos_token_id": 2
+}
\ No newline at end of file
diff --git a/ndarray-cache.json b/ndarray-cache.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c484b74d252e5dcb5174c0c36444b6980dd7eaf
--- /dev/null
+++ b/ndarray-cache.json
@@ -0,0 +1,3577 @@
+{
+    "metadata": {
+        "ParamSize": 243,
+        "ParamBytes": 3770683488.0,
+        "BitsPerParam": 16.0
+    },
+    "records": [
+        {
+            "dataPath": "params_shard_0.bin",
+            "format": "raw-shard",
+            "nbytes": 388903680,
+            "records": [
+                {
+                    "name": "model.embed_tokens.weight",
+                    "shape": [
+                        122760,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 388903680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "5424902e966672e163b13a12becc21b7"
+        },
+        {
+            "dataPath": "params_shard_1.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.0.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "da075eae455338050a9cc0919cc47190"
+        },
+        {
+            "dataPath": "params_shard_2.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.0.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "8f2d8f38944fea709da9bdf4f9a19e96"
+        },
+        {
+            "dataPath": "params_shard_3.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.0.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.0.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.0.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.0.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "2ebe7c77dd853a0fbe190d6d1777f2ee"
+        },
+        {
+            "dataPath": "params_shard_4.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.1.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "8d402ba3ecad6fad91d07bd1f6903e96"
+        },
+        {
+            "dataPath": "params_shard_5.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.1.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "de8846ca164a4c36e72d6643f79e6841"
+        },
+        {
+            "dataPath": "params_shard_6.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.1.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.1.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.1.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.1.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "ff6adf1dbb95217c2edf2f6e0783dfbf"
+        },
+        {
+            "dataPath": "params_shard_7.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.2.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "8d88972ad3e6f0cafe915db747d89974"
+        },
+        {
+            "dataPath": "params_shard_8.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.2.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "f87273671993b30f055a0efa88011259"
+        },
+        {
+            "dataPath": "params_shard_9.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.2.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.2.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.2.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.2.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "97850164831e0d0ca7389c9264ae9566"
+        },
+        {
+            "dataPath": "params_shard_10.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.3.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "a3272de3d0f8bc568967b3055e31d496"
+        },
+        {
+            "dataPath": "params_shard_11.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.3.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "f5d095d76da498007ec39485d0a2cdd0"
+        },
+        {
+            "dataPath": "params_shard_12.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.3.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.3.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.3.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.3.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "d30524dd425ae1ecb8a90ca00fb007b4"
+        },
+        {
+            "dataPath": "params_shard_13.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.4.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "b53093ecba11550816894407b5f23f89"
+        },
+        {
+            "dataPath": "params_shard_14.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.4.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "f465f88756a59532b03fcae03eced213"
+        },
+        {
+            "dataPath": "params_shard_15.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.4.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.4.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.4.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.4.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "4ccb8538c5fd88d55948af927a1ac1bb"
+        },
+        {
+            "dataPath": "params_shard_16.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.5.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "fd4e7f3ebb5752fb37480b93a304b416"
+        },
+        {
+            "dataPath": "params_shard_17.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.5.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "0784a80db75dd4812a3962cb79d52e43"
+        },
+        {
+            "dataPath": "params_shard_18.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.5.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.5.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.5.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.5.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "5b6b809d5cd211cab6906c3a2b35d72d"
+        },
+        {
+            "dataPath": "params_shard_19.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.6.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "6d6e025a7c9a2901c9cef1fce10a15ab"
+        },
+        {
+            "dataPath": "params_shard_20.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.6.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "03fc310850a81b86fc48a178dedb746c"
+        },
+        {
+            "dataPath": "params_shard_21.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.6.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.6.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.6.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.6.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "2ca04727ca7180ad82e8678d05ab547d"
+        },
+        {
+            "dataPath": "params_shard_22.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.7.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "b5271f3eaa62703836ed364db3a331a9"
+        },
+        {
+            "dataPath": "params_shard_23.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.7.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "66be5651d933ee0242277c65904b1ad4"
+        },
+        {
+            "dataPath": "params_shard_24.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.7.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.7.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.7.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.7.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "f64e50b000480274c4115e60c46f2596"
+        },
+        {
+            "dataPath": "params_shard_25.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.8.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "d5a0a3ce5c83093a3c821a2f216875b3"
+        },
+        {
+            "dataPath": "params_shard_26.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.8.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "16624c0621ab5e7459e2eeac9f54faa0"
+        },
+        {
+            "dataPath": "params_shard_27.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.8.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.8.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.8.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.8.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "ffa6d036b48233dc14aeab387c8cddc9"
+        },
+        {
+            "dataPath": "params_shard_28.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.9.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "f2def704d3e32216992e8738b69bc5f6"
+        },
+        {
+            "dataPath": "params_shard_29.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.9.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "e7c2b7a52f372dda4c281aa93905e00c"
+        },
+        {
+            "dataPath": "params_shard_30.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.9.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.9.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.9.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.9.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "e15a4fd16536de3e831d63c798c2fe0e"
+        },
+        {
+            "dataPath": "params_shard_31.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.10.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "7e407ff48896e87fc91141de35d583a4"
+        },
+        {
+            "dataPath": "params_shard_32.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.10.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "23bf631f23071358b7efc7cd7317f7d3"
+        },
+        {
+            "dataPath": "params_shard_33.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.10.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.10.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.10.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.10.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "88f7e594613118161dcd21fcb9cd960c"
+        },
+        {
+            "dataPath": "params_shard_34.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.11.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "d6bb940e0a1375e667eb156e4161ddd2"
+        },
+        {
+            "dataPath": "params_shard_35.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.11.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "d8227fa1b74bf2898dbfeb0baa27a667"
+        },
+        {
+            "dataPath": "params_shard_36.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.11.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.11.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.11.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.11.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "d68d0448f94ddb73a59cbd5bd79ac7c9"
+        },
+        {
+            "dataPath": "params_shard_37.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.12.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "bcf3b8db66e58d54c96520a665f8065b"
+        },
+        {
+            "dataPath": "params_shard_38.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.12.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "e29882cc3f9cda013f07bcb1356a80fe"
+        },
+        {
+            "dataPath": "params_shard_39.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.12.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.12.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.12.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.12.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "a3b16776dad1b22b54ee337999435657"
+        },
+        {
+            "dataPath": "params_shard_40.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.13.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "b88d51acd777bc2e55bd10f959f8a93f"
+        },
+        {
+            "dataPath": "params_shard_41.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.13.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "3500f23cf748b1a56d49b4ff49f0365f"
+        },
+        {
+            "dataPath": "params_shard_42.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.13.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.13.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.13.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.13.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "33abe5b62d470739b5ed78283a044f83"
+        },
+        {
+            "dataPath": "params_shard_43.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.14.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "2765fecea71b0360b175897dd47923ee"
+        },
+        {
+            "dataPath": "params_shard_44.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.14.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "4494fefbcd175f2519bbdcdacdda310a"
+        },
+        {
+            "dataPath": "params_shard_45.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.14.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.14.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.14.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.14.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "4a178e7a287c0c76e6a380ec56cbe9a7"
+        },
+        {
+            "dataPath": "params_shard_46.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.15.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "55245b4674b869c2f9185c8eb0286588"
+        },
+        {
+            "dataPath": "params_shard_47.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.15.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "36ecfe029069b47fd6f1134708158581"
+        },
+        {
+            "dataPath": "params_shard_48.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.15.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.15.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.15.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.15.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "d8e911df951bdd5a082e47a23edb5087"
+        },
+        {
+            "dataPath": "params_shard_49.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.16.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "2f96f096f35797dcd331a1b7e7dc670c"
+        },
+        {
+            "dataPath": "params_shard_50.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.16.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "c0ed78fb5cefd70b4b751e5356b17799"
+        },
+        {
+            "dataPath": "params_shard_51.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.16.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.16.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.16.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.16.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "4d36c70f5b4e40ccd2d347efe2c51111"
+        },
+        {
+            "dataPath": "params_shard_52.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.17.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "8e8824562a1c993b727a19974f488505"
+        },
+        {
+            "dataPath": "params_shard_53.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.17.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "a1eed9fbece3b48506a837657474ffa8"
+        },
+        {
+            "dataPath": "params_shard_54.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.17.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.17.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.17.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.17.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "62874b81026d07a4bd8e80d29f15a81a"
+        },
+        {
+            "dataPath": "params_shard_55.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.18.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "ac9872ea8f33b7151a10c53b86e5d073"
+        },
+        {
+            "dataPath": "params_shard_56.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.18.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "a456578bca4c54691ceb8865a389f7a8"
+        },
+        {
+            "dataPath": "params_shard_57.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.18.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.18.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.18.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.18.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "f63ec061066efb418f5e2651ea149fdb"
+        },
+        {
+            "dataPath": "params_shard_58.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.19.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "65025ea5d3cea6eb028d6f8c46fc62e0"
+        },
+        {
+            "dataPath": "params_shard_59.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.19.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "4abdbb11d23e66ad3ecf990a6bb363d1"
+        },
+        {
+            "dataPath": "params_shard_60.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.19.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.19.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.19.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.19.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "a20cdc6607f735c07473849b99dba928"
+        },
+        {
+            "dataPath": "params_shard_61.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.20.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "511c103b536838c6c7ddd25e2f7d1425"
+        },
+        {
+            "dataPath": "params_shard_62.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.20.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "ac823cd64add465e41a6be2f1ca26db4"
+        },
+        {
+            "dataPath": "params_shard_63.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.20.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.20.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.20.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.20.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "5361875704d7d8a78f23a76af6c1f5e3"
+        },
+        {
+            "dataPath": "params_shard_64.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.21.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "41203a108cdb61ac99dc3de98e6d2d5b"
+        },
+        {
+            "dataPath": "params_shard_65.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.21.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "b867c3b5a479bc228c780d9b34470112"
+        },
+        {
+            "dataPath": "params_shard_66.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.21.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.21.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.21.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.21.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "b8af68a9699a8b5d2bad49d45dd9cabe"
+        },
+        {
+            "dataPath": "params_shard_67.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.22.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "0e6f76f83caa0af084aef01806dd672c"
+        },
+        {
+            "dataPath": "params_shard_68.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.22.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "a1427cdf78c61bbdd282221d50e4672b"
+        },
+        {
+            "dataPath": "params_shard_69.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.22.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.22.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.22.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.22.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "20350533c7620c96d9adf2673de0426d"
+        },
+        {
+            "dataPath": "params_shard_70.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.23.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "a49da40e825d8404eacfb4034fbbd8c4"
+        },
+        {
+            "dataPath": "params_shard_71.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.23.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "f1cb255ccccfe1d2e1a330ed5979e254"
+        },
+        {
+            "dataPath": "params_shard_72.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.23.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.23.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.23.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.23.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "f2bf016580101897aecb7bd7bf1c0a63"
+        },
+        {
+            "dataPath": "params_shard_73.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.24.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "439aa06815f2fbab2111440df9560af0"
+        },
+        {
+            "dataPath": "params_shard_74.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.24.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "acbf2065d4bc07dae667ae43126557e1"
+        },
+        {
+            "dataPath": "params_shard_75.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.24.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.24.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.24.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.24.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "32a264c52765fcea760d61f7e1f5da6b"
+        },
+        {
+            "dataPath": "params_shard_76.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.25.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "4339909c51f39e1f5407d780cdaae24b"
+        },
+        {
+            "dataPath": "params_shard_77.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.25.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "b44044761dd6c0c8099c3480bcbc5b57"
+        },
+        {
+            "dataPath": "params_shard_78.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.25.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.25.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.25.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.25.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "6367bb8bb960915da18cb3f652319405"
+        },
+        {
+            "dataPath": "params_shard_79.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.26.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "669fe8700061ee2561e10ee370ae0c8a"
+        },
+        {
+            "dataPath": "params_shard_80.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.26.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "3b495124a1f55dbef1ad35083d305b50"
+        },
+        {
+            "dataPath": "params_shard_81.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.26.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.26.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.26.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.26.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "b4daf51f329762077bceb0bf6a6b0e02"
+        },
+        {
+            "dataPath": "params_shard_82.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.27.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "6b7c25df1f319b322323a8568805ce76"
+        },
+        {
+            "dataPath": "params_shard_83.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.27.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "95ddefef00287ebdfd4d04a270baa67d"
+        },
+        {
+            "dataPath": "params_shard_84.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.27.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.27.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.27.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.27.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "e746879f720ac3cc5202f9d506209f58"
+        },
+        {
+            "dataPath": "params_shard_85.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.28.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "c3fcdf813e3e531be209ea664b45451f"
+        },
+        {
+            "dataPath": "params_shard_86.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.28.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "70090e4949c2df99d04a673dda5088c7"
+        },
+        {
+            "dataPath": "params_shard_87.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.28.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.28.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.28.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.28.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "87281f021ad408df423e73ca37da8d4e"
+        },
+        {
+            "dataPath": "params_shard_88.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.29.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "792ea2cc17729948fbfb1f5efa4f8283"
+        },
+        {
+            "dataPath": "params_shard_89.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.29.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "02b09f228c45c9516e116aa894bbe547"
+        },
+        {
+            "dataPath": "params_shard_90.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.29.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.29.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.29.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.29.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "01abcb1cadc96a2680e7d46329fcea4c"
+        },
+        {
+            "dataPath": "params_shard_91.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.30.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "c91c09a0bfe183a5782b5d743e93ec70"
+        },
+        {
+            "dataPath": "params_shard_92.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.30.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "b87d1fd463ac311a58631b42c1411f34"
+        },
+        {
+            "dataPath": "params_shard_93.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.30.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.30.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.30.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.30.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "20717b5ab570bb4a2da3bec116c0e724"
+        },
+        {
+            "dataPath": "params_shard_94.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.31.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "4eb60e8ae2648f3917a2731487dbcf85"
+        },
+        {
+            "dataPath": "params_shard_95.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.31.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "cdaea89da476e9e7e5cf23670cc1e77f"
+        },
+        {
+            "dataPath": "params_shard_96.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.31.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.31.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.31.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.31.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "894e6517fc687d6b12f7ee6f3e37f17b"
+        },
+        {
+            "dataPath": "params_shard_97.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.32.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "5b0274f2671acde02e7e149324587164"
+        },
+        {
+            "dataPath": "params_shard_98.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.32.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "cbf3f29332c57967f07d73db12a3cdf7"
+        },
+        {
+            "dataPath": "params_shard_99.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.32.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.32.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.32.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.32.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "512df7dc1b276b61f37ef3bac8280d7d"
+        },
+        {
+            "dataPath": "params_shard_100.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.33.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "719c0fc160601c28d6c76450a2128138"
+        },
+        {
+            "dataPath": "params_shard_101.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.33.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "bd15501cda643976f06172001e960c39"
+        },
+        {
+            "dataPath": "params_shard_102.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.33.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.33.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.33.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.33.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "be7acf9053baf8c49e56c8e55e1c1738"
+        },
+        {
+            "dataPath": "params_shard_103.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.34.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "153a3facce91de97933288c2680c2142"
+        },
+        {
+            "dataPath": "params_shard_104.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.34.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "482b1807b922f621fd61b66aacc1a7b3"
+        },
+        {
+            "dataPath": "params_shard_105.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.34.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.34.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.34.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.34.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "341b2d4a6d2e1881ab41809feef8b216"
+        },
+        {
+            "dataPath": "params_shard_106.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.35.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "c9ae89585b805e137898ef5253ee89ba"
+        },
+        {
+            "dataPath": "params_shard_107.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.35.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "b2cbc54d8c57c040c213edfbeabe39a9"
+        },
+        {
+            "dataPath": "params_shard_108.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.35.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.35.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.35.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.35.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "8d0aba256987045e77c7cdd8acac137f"
+        },
+        {
+            "dataPath": "params_shard_109.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.36.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "448744ffb049f3450092df4b97c261e1"
+        },
+        {
+            "dataPath": "params_shard_110.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.36.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "7d5e853471f03df39ac9510d02fb6fc9"
+        },
+        {
+            "dataPath": "params_shard_111.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.36.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.36.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.36.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.36.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "755ea5a2e83ea85e906fe0d2ae77369b"
+        },
+        {
+            "dataPath": "params_shard_112.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.37.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "74498e95bdb634b758f6a51e1be48c6d"
+        },
+        {
+            "dataPath": "params_shard_113.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.37.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "cd8d7efa88c1534c956eb5048aa96416"
+        },
+        {
+            "dataPath": "params_shard_114.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.37.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.37.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.37.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.37.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "9160ad34620d2c1a21c3509a9b33492e"
+        },
+        {
+            "dataPath": "params_shard_115.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.38.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "31276f010e512251265db1ecc5314be4"
+        },
+        {
+            "dataPath": "params_shard_116.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.38.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "3bfd0dac94c404568339328931dda60b"
+        },
+        {
+            "dataPath": "params_shard_117.bin",
+            "format": "raw-shard",
+            "nbytes": 20078784,
+            "records": [
+                {
+                    "name": "model.layers.38.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.38.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.38.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.38.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                }
+            ],
+            "md5sum": "019691d999c4c68a6e9f5758ebd3a6d5"
+        },
+        {
+            "dataPath": "params_shard_118.bin",
+            "format": "raw-shard",
+            "nbytes": 36495360,
+            "records": [
+                {
+                    "name": "model.layers.39.mlp.gate_up_proj.weight",
+                    "shape": [
+                        11520,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 36495360,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "46ce83f72fb4071cb54c7649bd6b9cc5"
+        },
+        {
+            "dataPath": "params_shard_119.bin",
+            "format": "raw-shard",
+            "nbytes": 18247680,
+            "records": [
+                {
+                    "name": "model.layers.39.mlp.down_proj.weight",
+                    "shape": [
+                        1584,
+                        5760
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 18247680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "feebc8623ab88876de3eb2366dcfce7d"
+        },
+        {
+            "dataPath": "params_shard_120.bin",
+            "format": "raw-shard",
+            "nbytes": 388903680,
+            "records": [
+                {
+                    "name": "lm_head.weight",
+                    "shape": [
+                        122760,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 388903680,
+                    "byteOffset": 0
+                }
+            ],
+            "md5sum": "02a95b3956cdba027d5589b5d9a8114b"
+        },
+        {
+            "dataPath": "params_shard_121.bin",
+            "format": "raw-shard",
+            "nbytes": 20081952,
+            "records": [
+                {
+                    "name": "model.layers.39.self_attn.wqkv_pack.weight",
+                    "shape": [
+                        4752,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 15054336,
+                    "byteOffset": 0
+                },
+                {
+                    "name": "model.layers.39.self_attn.o_proj.weight",
+                    "shape": [
+                        1584,
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 5018112,
+                    "byteOffset": 15054336
+                },
+                {
+                    "name": "model.layers.39.input_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20072448
+                },
+                {
+                    "name": "model.layers.39.post_attention_layernorm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20075616
+                },
+                {
+                    "name": "model.norm.weight",
+                    "shape": [
+                        1584
+                    ],
+                    "dtype": "float16",
+                    "format": "f32-to-bf16",
+                    "nbytes": 3168,
+                    "byteOffset": 20078784
+                }
+            ],
+            "md5sum": "8c76431e7a928bfa50d6f30a2287b4ca"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/params_shard_0.bin b/params_shard_0.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b370f2e79f8f9f2e4da5dc8746a8dbd0efe8b31d
--- /dev/null
+++ b/params_shard_0.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb15a62e3c1f3e2af46144f84b7ccb70329f310a668837f51a160801fc0d4bbe
+size 388903680
diff --git a/params_shard_1.bin b/params_shard_1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cc23c80115026fb8ccd086eec52da0e6098ce3b7
--- /dev/null
+++ b/params_shard_1.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d164162c019e2724448d0b64898868cb3a787f16351b8a1fae0004bfa4673880
+size 36495360
diff --git a/params_shard_10.bin b/params_shard_10.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8cbdbe4eff73afab2e8f26be42d4820068825231
--- /dev/null
+++ b/params_shard_10.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff8a3f949db905ec678515ade82353253495e5d7a3e771e25bc808eb0ce1a76e
+size 36495360
diff --git a/params_shard_11.bin b/params_shard_11.bin
new file mode 100644
index 0000000000000000000000000000000000000000..994c9ae23d1723a7aa00a1c0af5258712df21f0b
--- /dev/null
+++ b/params_shard_11.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18181188e0b68c55312b8120530d5f03b0d92ebfba5785d717021bb50414fdc8
+size 18247680
diff --git a/params_shard_12.bin b/params_shard_12.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a5c81aea52e38179b8823cb9685432f291910c70
--- /dev/null
+++ b/params_shard_12.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2f4c4a89bdcd95414bd4ec200b7a4004d6392d566575ca1bbaf950cf3790764
+size 20078784
diff --git a/params_shard_13.bin b/params_shard_13.bin
new file mode 100644
index 0000000000000000000000000000000000000000..91d86188502ca18e53a95e8e4a31832fce75508c
--- /dev/null
+++ b/params_shard_13.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbfc839748491ec01fece6f3f8e2624877dd9aea96dab2cedbb77d4830957c92
+size 36495360
diff --git a/params_shard_14.bin b/params_shard_14.bin
new file mode 100644
index 0000000000000000000000000000000000000000..990201d2d0c88945c3636b362c71dc62de387b8d
--- /dev/null
+++ b/params_shard_14.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64c9076f79b3421bd06b9fa6da07b9a893db512efec624c9c3f4806810c9585c
+size 18247680
diff --git a/params_shard_15.bin b/params_shard_15.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cd21b37d6caef951a52758a1a3085992cc784fbd
--- /dev/null
+++ b/params_shard_15.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05d5e5fe77135b3294c5e94576a8a5cc27516954f40a8ae1dc05b72a6b8a0986
+size 20078784
diff --git a/params_shard_16.bin b/params_shard_16.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5ad2f6149f6ebf2b816c3896cc6a104ea4d826c4
--- /dev/null
+++ b/params_shard_16.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08d05efcdc38e1ac5635b62ffc52ad49dbc411c553c8fb09b0509b0026c45420
+size 36495360
diff --git a/params_shard_17.bin b/params_shard_17.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f28ffd3d249d4ab96149cc5a95c0598bf062df3a
--- /dev/null
+++ b/params_shard_17.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b264eb8df96ee51ab239e3fedc80fd99e724389b061cff4da59583bba1607ae
+size 18247680
diff --git a/params_shard_18.bin b/params_shard_18.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2a53441776f2941d76ea74bfa58462543fd92084
--- /dev/null
+++ b/params_shard_18.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a805b692ecb604a5110c3dd9cc7a5688e8bb33e7a2b34430c09da1a6239dafa8
+size 20078784
diff --git a/params_shard_19.bin b/params_shard_19.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8a74cc303ea470f6d65e047cae724631f450c16
--- /dev/null
+++ b/params_shard_19.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99b6f07e741cb5ba899c39631047b6bedcb58e3457dc0692c6b7112159c1ac3d
+size 36495360
diff --git a/params_shard_2.bin b/params_shard_2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fd348db408bd61e1aebbbb02aef600fe87155934
--- /dev/null
+++ b/params_shard_2.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5b071e815b459e5b520e1e57b6d4bf7b0b8630cc468ddcd9fe86f7f84d4e9cd
+size 18247680
diff --git a/params_shard_20.bin b/params_shard_20.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a7d562f7d2ca14d9f442308094a877db046287ac
--- /dev/null
+++ b/params_shard_20.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6102b7904f28db8b550a89fe0501023fb23e79eafd665e383e97855617b0fc1b
+size 18247680
diff --git a/params_shard_21.bin b/params_shard_21.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ac5f413b6bf482504ab5082a2c9dafe4d2f95e4c
--- /dev/null
+++ b/params_shard_21.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec1ba7f51981c31436a1933396eab1b1ba57ac57b251b002d2df0bf4ae089848
+size 20078784
diff --git a/params_shard_22.bin b/params_shard_22.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8ebda93a4f993551dc5b827b504c5b57bfb38204
--- /dev/null
+++ b/params_shard_22.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b84bdbca0e2bae40d29b7286b75ce3d53f1bd592f9b88bf502d9d3f6eb7ad1a6
+size 36495360
diff --git a/params_shard_23.bin b/params_shard_23.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4376fe5afe9efd39fcab8cdd1755af72d057c933
--- /dev/null
+++ b/params_shard_23.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef81265bee5067c7628781dfbb8c7b17fe9f1263492487ceb7dea7c76176e78f
+size 18247680
diff --git a/params_shard_24.bin b/params_shard_24.bin
new file mode 100644
index 0000000000000000000000000000000000000000..562074b4890aa4e0f03135d5b8395996e2692e50
--- /dev/null
+++ b/params_shard_24.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea2a625a90d283b1456aee4db9db0a3482b1bb693b42c47328cac7867258bc4b
+size 20078784
diff --git a/params_shard_25.bin b/params_shard_25.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4579a67e7ed66a08fc89cca7bb5325845263fc21
--- /dev/null
+++ b/params_shard_25.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70c74de0cd089ef93794cb7533941755f82f58133f9b46e9603e9b2995f53ced
+size 36495360
diff --git a/params_shard_26.bin b/params_shard_26.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2564b13e22e07f432ee37b1cadbf90482cf0ea60
--- /dev/null
+++ b/params_shard_26.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1783ef1268fa4844b204d6c2a3ce00af487c13d1b937fb51e80c81589f1cdb2d
+size 18247680
diff --git a/params_shard_27.bin b/params_shard_27.bin
new file mode 100644
index 0000000000000000000000000000000000000000..425470e2d6e2bc2f73632114ac341ad02efcf268
--- /dev/null
+++ b/params_shard_27.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:027f59762159a17d7882db3c0a2e5004da20ce123c8a43c846574bfb861975fd
+size 20078784
diff --git a/params_shard_28.bin b/params_shard_28.bin
new file mode 100644
index 0000000000000000000000000000000000000000..badf1b7f20c584a5a19c13ea52be1ae408a904c0
--- /dev/null
+++ b/params_shard_28.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b032ecf3379886174d5d61e9121a73d5526d4bf7fabf623564613db3a61b845
+size 36495360
diff --git a/params_shard_29.bin b/params_shard_29.bin
new file mode 100644
index 0000000000000000000000000000000000000000..df1e78069c9c3b6ef980a0878dce0a8647945c04
--- /dev/null
+++ b/params_shard_29.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba180004b70c968fe7cf84d238bc9c3c5e6919f104370174b4c9d4d3a6f48126
+size 18247680
diff --git a/params_shard_3.bin b/params_shard_3.bin
new file mode 100644
index 0000000000000000000000000000000000000000..26586291f0919fcaca64af71eb55fe4c1867d501
--- /dev/null
+++ b/params_shard_3.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a63ecec2bbc479de9216cf2e396d7f80427be10fddc80a3ace06597ab233725b
+size 20078784
diff --git a/params_shard_30.bin b/params_shard_30.bin
new file mode 100644
index 0000000000000000000000000000000000000000..23c860c6a7b6a8dad9bbe2f3f468ff6f74bcf0b7
--- /dev/null
+++ b/params_shard_30.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cc9e7dd811f8fe2d30ffd8942c0dc91355ae0bdbab7807c15424d1943c1f360
+size 20078784
diff --git a/params_shard_31.bin b/params_shard_31.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4661d1ee51566939fdf50ca934c0eb6e5720e76b
--- /dev/null
+++ b/params_shard_31.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd80ffeac6f575f85faabb032f231d63edad351c14906df2b6a82f830c67a253
+size 36495360
diff --git a/params_shard_32.bin b/params_shard_32.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c2f545037354a5c7a766ab6c407931d2eeb20c5
--- /dev/null
+++ b/params_shard_32.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4acac732b3d3aae91cc8257b21c07cb3f1505d0b75daa9e1901fb1ac1ca38b87
+size 18247680
diff --git a/params_shard_33.bin b/params_shard_33.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2abdbd2456d0897f4f46b19808af296fdafe09e2
--- /dev/null
+++ b/params_shard_33.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:814b64ca2417ed08f2f31295cb3f65909b69f086d29a65e2c2abaf1ebffbd1c1
+size 20078784
diff --git a/params_shard_34.bin b/params_shard_34.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7811c7d76069b79cf28a6d01e200934330ae35b6
--- /dev/null
+++ b/params_shard_34.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db0aef5fa72a8c73555b70690703c85dcc28163483f31721553b2db59b9c59f5
+size 36495360
diff --git a/params_shard_35.bin b/params_shard_35.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b4b240bcd48e93ab54feb0d9bb895ab0ffc55617
--- /dev/null
+++ b/params_shard_35.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d54b5f7ccb6d0c43c66abec297e25cd92503faf25cc0b7c33aeae60538e43761
+size 18247680
diff --git a/params_shard_36.bin b/params_shard_36.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2e25624eb4b3304df76f67675899e4949adc9675
--- /dev/null
+++ b/params_shard_36.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6ca7d8a551dd2c4990cf2743f4d89217fa6e501e3d2927bc378ef0130f64f6b
+size 20078784
diff --git a/params_shard_37.bin b/params_shard_37.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2d46e385ac50442735a489d643f214badcf63af2
--- /dev/null
+++ b/params_shard_37.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c52357313b2697abdde0c5a881172f4bef773e60b17a73f36f5e41d3c4262ddc
+size 36495360
diff --git a/params_shard_38.bin b/params_shard_38.bin
new file mode 100644
index 0000000000000000000000000000000000000000..28cd81d90aba047df4da62390f912c297adbf226
--- /dev/null
+++ b/params_shard_38.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f61780d5f9a885bdfdc19cd03fc9bdfc1689c2f40331dfaaa39af37f2f13c041
+size 18247680
diff --git a/params_shard_39.bin b/params_shard_39.bin
new file mode 100644
index 0000000000000000000000000000000000000000..213b8e0a11b1ebbf0f6495887b32a214df67f092
--- /dev/null
+++ b/params_shard_39.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7291c1901cb4903c808bc9a26bdf96adbd8eff0eb2fe5d14f189bb3983a9d7e2
+size 20078784
diff --git a/params_shard_4.bin b/params_shard_4.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d22183b2a2353fac1725ec378f8762a3b8c5f2ee
--- /dev/null
+++ b/params_shard_4.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4467db9edfc706f90cbf2021031bc7d53c9198a9bb68ce99a6301165552115c
+size 36495360
diff --git a/params_shard_40.bin b/params_shard_40.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5a105e2c09954079bb620dca44d3bcb87e08b57b
--- /dev/null
+++ b/params_shard_40.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2dd1023f79b314263b4e7dcbf2730a9a468ee2b1476f18ad57718d7a295b915f
+size 36495360
diff --git a/params_shard_41.bin b/params_shard_41.bin
new file mode 100644
index 0000000000000000000000000000000000000000..956e85a7a5a5359457e24733098cff49775559ac
--- /dev/null
+++ b/params_shard_41.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c23372851d5cbc6b7151c207172883ea1fd38cfd39422ef5c46bf315c03507af
+size 18247680
diff --git a/params_shard_42.bin b/params_shard_42.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5c7b35787d5cd4be4d3a7c297b31f9daf71fd664
--- /dev/null
+++ b/params_shard_42.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f607b5c7b60fe213c449c5fafdbd5d59ce244f4652bee66c9a8d10db011164b
+size 20078784
diff --git a/params_shard_43.bin b/params_shard_43.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c8e085a02535bd1f13437e2f8d4be961eff2b3bf
--- /dev/null
+++ b/params_shard_43.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afd07af070213294b2ec9fee620e67bf5de6c2663a300a75d7b26b510eea10eb
+size 36495360
diff --git a/params_shard_44.bin b/params_shard_44.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bca3f2974b2136dc6cea76f82a43b15b1b394358
--- /dev/null
+++ b/params_shard_44.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:352a18c944c902be676d7143da097f4168696996a69872464c7208a91735068f
+size 18247680
diff --git a/params_shard_45.bin b/params_shard_45.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bb63edbd775f1ed8ded9a39f87b918a74c552976
--- /dev/null
+++ b/params_shard_45.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12ad84421a782f54b229c9a0a75a28269ddf627c7a52417a6811feba2dea0577
+size 20078784
diff --git a/params_shard_46.bin b/params_shard_46.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a02ad60a0867d09e08f0e78a4f28542c9b2d29db
--- /dev/null
+++ b/params_shard_46.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ce3ad630ab2f484738ca2a57f0846cc4f47a43daaaa11a8e83b23e173f1be82
+size 36495360
diff --git a/params_shard_47.bin b/params_shard_47.bin
new file mode 100644
index 0000000000000000000000000000000000000000..24590c8fe798b56e9bf71b13cc4fd54d40bb78d0
--- /dev/null
+++ b/params_shard_47.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00c431dd8b15973b1c751b6e248df55df7e0a1fccacb343afa4e7ac9e1fb6a90
+size 18247680
diff --git a/params_shard_48.bin b/params_shard_48.bin
new file mode 100644
index 0000000000000000000000000000000000000000..263f0d9a394929cd774b950f69f5ddb084452375
--- /dev/null
+++ b/params_shard_48.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa5b3aef019ca641df2b2aeecae2e1ab2581a2bebf1ade9ebe46365022520c32
+size 20078784
diff --git a/params_shard_49.bin b/params_shard_49.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8986246b1c75ecdc9437d4b9c68627d94e89924a
--- /dev/null
+++ b/params_shard_49.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ed63c0b7d4f1428a950e55c610ca5a2d3a61ebc368a51ac9c920138ff9e06d7
+size 36495360
diff --git a/params_shard_5.bin b/params_shard_5.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5aafbf52c9523cbc5e6800326631a7ccb3b3456e
--- /dev/null
+++ b/params_shard_5.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49880d42de589490b34179ece388c50c6cfd8bae74d78c5b2199597d7326a345
+size 18247680
diff --git a/params_shard_50.bin b/params_shard_50.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2e260726ba9bd5c071799ee25cc56cc1778b6c56
--- /dev/null
+++ b/params_shard_50.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26320a1492ec0537f3f9fbf1a09436217b30e4d73074257505c35e7ba57112c4
+size 18247680
diff --git a/params_shard_6.bin b/params_shard_6.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fb8e0e5a25c58325f1accfb073e6a78554e6a436
--- /dev/null
+++ b/params_shard_6.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1adcef26680c263543eaf0c18f8ca5a02dca685296e2c231e265da0272e8830
+size 20078784
diff --git a/params_shard_7.bin b/params_shard_7.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ba86f5a81d1c03cd35884e7dfe842ff9976f7f49
--- /dev/null
+++ b/params_shard_7.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac040d4c70c98f9bc45c04c66c1367317c4bffb49a9e1835888cb392042dfc84
+size 36495360
diff --git a/params_shard_8.bin b/params_shard_8.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a98f821840282a3def5eb90106ac84183eb16cad
--- /dev/null
+++ b/params_shard_8.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66a126194a4dd7bd820d21fb04455c5cb2b12194a0c7533c0b27e6ded72e86fe
+size 18247680
diff --git a/params_shard_9.bin b/params_shard_9.bin
new file mode 100644
index 0000000000000000000000000000000000000000..be28a6cfe9ed56843a5957a68dc318b472d8143a
--- /dev/null
+++ b/params_shard_9.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:476455ab2be13b20d9716c2a3611a5e1518a7152a9f9a9b8cd6a24814bec653b
+size 20078784