{"tree_metadata": {"('transformer/embedder', 'input_embedding')": {"key_metadata": [{"key": "transformer/embedder", "key_type": 2}, {"key": "input_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/final_norm', 'scale')": {"key_metadata": [{"key": "transformer/final_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_0/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_0/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_0/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_0/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_0/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_0/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_0/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_0/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_0/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_1/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_1/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_1/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_1/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_1/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_1/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_1/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_1/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_1/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_10/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_10/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_10/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_10/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_10/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_10/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_10/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_10/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_10/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_11/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_11/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_11/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_11/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_11/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_11/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_11/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_11/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_11/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_12/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_12/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_12/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_12/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_12/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_12/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_12/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_12/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_12/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_13/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_13/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_13/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_13/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_13/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_13/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_13/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_13/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_13/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_14/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_14/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_14/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_14/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_14/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_14/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_14/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_14/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_14/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_15/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_15/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_15/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_15/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_15/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_15/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_15/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_15/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_15/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_16/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_16/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_16/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_16/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_16/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_16/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_16/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_16/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_16/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_17/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_17/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_17/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_17/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_17/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_17/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_17/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_17/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_17/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_18/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_18/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_18/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_18/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_18/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_18/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_18/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_18/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_18/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_19/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_19/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_19/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_19/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_19/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_19/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_19/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_19/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_19/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_2/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_2/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_2/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_2/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_2/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_2/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_2/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_2/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_2/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_20/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_20/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_20/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_20/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_20/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_20/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_20/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_20/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_20/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_21/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_21/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_21/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_21/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_21/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_21/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_21/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_21/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_21/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_22/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_22/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_22/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_22/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_22/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_22/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_22/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_22/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_22/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_23/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_23/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_23/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_23/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_23/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_23/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_23/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_23/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_23/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_24/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_24/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_24/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_24/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_24/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_24/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_24/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_24/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_24/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_25/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_25/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_25/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_25/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_25/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_25/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_25/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_25/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_25/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_3/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_3/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_3/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_3/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_3/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_3/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_3/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_3/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_3/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_4/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_4/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_4/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_4/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_4/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_4/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_4/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_4/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_4/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_5/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_5/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_5/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_5/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_5/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_5/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_5/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_5/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_5/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_6/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_6/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_6/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_6/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_6/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_6/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_6/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_6/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_6/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_7/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_7/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_7/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_7/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_7/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_7/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_7/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_7/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_7/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_8/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_8/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_8/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_8/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_8/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_8/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_8/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_8/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_8/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_9/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_9/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_9/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_9/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_9/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_9/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_9/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_9/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_9/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}}, "use_zarr3": false}