gstaff
/

Mistral-7B-Instruct-v0.1-AWQ

Model card Files Files and versions Community

gstaff commited on Feb 24

Commit

4128c4a

•

1 Parent(s): d186f8f

Upload pre-quantized weight files.

Browse files

Files changed (38) hide show

mistral7b_hf_tokenizer/config.json +24 -0
mistral7b_hf_tokenizer/tokenizer.json +0 -0
mistral7b_hf_tokenizer/tokenizer.model +3 -0
mistral7b_hf_tokenizer/tokenizer_config.json +43 -0
mistral_kv_int8_scales/model.layers.0.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.1.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.10.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.11.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.12.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.13.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.14.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.15.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.16.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.17.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.18.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.19.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.2.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.20.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.21.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.22.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.23.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.24.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.25.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.26.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.27.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.28.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.29.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.3.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.30.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.31.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.4.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.5.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.6.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.7.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.8.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_kv_int8_scales/model.layers.9.attention.query_key_value.scale_y_quant_orig.bin +3 -0
mistral_tp1.json +1 -0
mistral_tp1_rank0.npz +3 -0

mistral7b_hf_tokenizer/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "sliding_window": 4096,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.34.0.dev0",
+  "use_cache": true,
+  "vocab_size": 32000
+}

mistral7b_hf_tokenizer/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

mistral7b_hf_tokenizer/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

mistral7b_hf_tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

mistral_kv_int8_scales/model.layers.0.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2bc44fc2c456b059d990917917da72dd109976a923dc288ae4ac619dc4532954
+size 4

mistral_kv_int8_scales/model.layers.1.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b6d565de118af220b4df4d235cabd7966f4979277aad42ab2e6233818d32315
+size 4

mistral_kv_int8_scales/model.layers.10.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73b49724f57dead0e75e7260ecab4fafbf9beae569b891c7735c76a9d770966f
+size 4

mistral_kv_int8_scales/model.layers.11.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0afe78cd6e62ee5582fcc51d26b08b47cdc7fcd49c29985c9f93e647380f17cf
+size 4

mistral_kv_int8_scales/model.layers.12.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9fa39ab9dc429f604f0b38adeb6b24b53ad538f5e41e575c08e751284a5cb88f
+size 4

mistral_kv_int8_scales/model.layers.13.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:80bdbf53669062b68b56889fbacd864a679e79ad3ef9acae73aa7ecac05be812
+size 4

mistral_kv_int8_scales/model.layers.14.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a48548451e4397cec752b6c52e6db1d944f40059e7175fb1a117f4486dc5fe3
+size 4

mistral_kv_int8_scales/model.layers.15.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:727854598f7fd3dd59bdc21944ad0b8af92d4db05055ba3c11e39b0b1a447698
+size 4

mistral_kv_int8_scales/model.layers.16.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdd5d05501ee04109505678c4c15dc3cfd1d061ca03455b090d5ba8a9a8e9459
+size 4

mistral_kv_int8_scales/model.layers.17.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f9272daf1ee10098346ca48e69da6d4070af90ecb9d5d681b0a7979c293f547
+size 4

mistral_kv_int8_scales/model.layers.18.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4bde49682090fe53da56339249648ef6cfc15671d98a4e85bd7e78ee8ae9c53f
+size 4

mistral_kv_int8_scales/model.layers.19.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2e1aa91d2a28b5af44c25841f53028e5f726f0c26bde790795dca7bdf4c0f7d
+size 4

mistral_kv_int8_scales/model.layers.2.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad1e79869ecf5664ab6538eb4385798c617c1c5a432cef6953cb9073e3b353e5
+size 4

mistral_kv_int8_scales/model.layers.20.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:867395ba08a1e01107c4d697a9eb91ed29f616910e2ba18afb54ea91bd5e582d
+size 4

mistral_kv_int8_scales/model.layers.21.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa8a8445c41a3e2572df89548bef055d40a125f668bd227852d22721dd882816
+size 4

mistral_kv_int8_scales/model.layers.22.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35a51c4f50f10a5a86ab207abfc1298382ce24ababd7b5b5701f50cb237888f8
+size 4

mistral_kv_int8_scales/model.layers.23.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15a07322cdc0e38ab40eeb7141b4f48ac293ebc361e945bca214f72a33d67899
+size 4

mistral_kv_int8_scales/model.layers.24.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:691ecc7795e93dc213c786ee59a4462735e869870007fb4f656e1eb8d7acabdf
+size 4

mistral_kv_int8_scales/model.layers.25.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13d444885f17085b8d4f58b949b605af0cd692e6f69ce5d44a0aa8a74f27b3f0
+size 4

mistral_kv_int8_scales/model.layers.26.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b5bdb0df8798ccbbcf56e9e8c5c3cf45b2688ee5216549904db12cbe9aacd24
+size 4

mistral_kv_int8_scales/model.layers.27.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fe25012a695e68ced4d94c906a480d2b08caed618bbc706518205de2906890b
+size 4

mistral_kv_int8_scales/model.layers.28.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0afe78cd6e62ee5582fcc51d26b08b47cdc7fcd49c29985c9f93e647380f17cf
+size 4

mistral_kv_int8_scales/model.layers.29.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1cf70ecb1f4b406332d5d82e3502abcbeb6a0ea597064021a8dac3d5408da6c0
+size 4

mistral_kv_int8_scales/model.layers.3.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea546aba6d258706be41571e9a103ae31bd3ffa14eb6312e781823240b13e412
+size 4

mistral_kv_int8_scales/model.layers.30.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee1fadea8f44339ec9ff0b4c838f1e985404dd67e1bfbd9f53650a4c92a46ada
+size 4

mistral_kv_int8_scales/model.layers.31.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0026a0b3dc7f8556c0e370e862617d5a80f3ad0428526531ac8dfbfbb8f1ccba
+size 4

mistral_kv_int8_scales/model.layers.4.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6db7167010a4526848105812d338f8217e9b8fd12d9b80f8749308f19983bcb
+size 4

mistral_kv_int8_scales/model.layers.5.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2bacefe3e82fa8a018f6a36c37b46fa77fb9b57fea346cfbb4f1bcf706b09180
+size 4

mistral_kv_int8_scales/model.layers.6.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b6907325a655ee634e411c3d4431705e12bde53e7f0d63c5ddccffcfaa4a259
+size 4

mistral_kv_int8_scales/model.layers.7.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15a07322cdc0e38ab40eeb7141b4f48ac293ebc361e945bca214f72a33d67899
+size 4

mistral_kv_int8_scales/model.layers.8.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b52e5265f7c06c2bd4a0ec653672c39a55d1fa08fbf56096780709ee273346d
+size 4

mistral_kv_int8_scales/model.layers.9.attention.query_key_value.scale_y_quant_orig.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bf6fdeab9aef1cd59b6f4383538f705d3a30cd0ee457941c25b2f065944b164
+size 4

mistral_tp1.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"version": 0.4, "quantization": "int4_awq", "awq_block_size": 128, "dtype": "float16", "vocab_size": 32000, "rank": 0, "tensor_parallel": 1, "vocab_embedding": {"weight": "_np:vocab_embedding:weight"}, "positional_embedding": null, "layers": [{"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:0:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:0:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:0:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:0:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:0:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:0:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:0:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:0:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:0:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:1:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:1:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:1:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:1:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:1:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:1:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:1:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:1:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:1:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:2:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:2:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:2:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:2:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:2:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:2:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:2:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:2:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:2:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:3:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:3:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:3:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:3:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:3:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:3:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:3:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:3:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:3:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:4:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:4:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:4:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:4:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:4:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:4:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:4:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:4:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:4:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:5:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:5:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:5:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:5:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:5:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:5:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:5:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:5:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:5:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:6:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:6:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:6:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:6:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:6:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:6:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:6:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:6:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:6:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:7:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:7:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:7:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:7:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:7:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:7:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:7:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:7:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:7:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:8:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:8:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:8:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:8:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:8:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:8:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:8:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:8:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:8:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:9:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:9:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:9:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:9:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:9:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:9:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:9:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:9:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:9:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:10:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:10:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:10:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:10:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:10:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:10:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:10:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:10:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:10:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:11:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:11:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:11:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:11:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:11:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:11:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:11:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:11:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:11:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:12:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:12:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:12:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:12:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:12:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:12:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:12:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:12:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:12:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:13:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:13:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:13:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:13:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:13:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:13:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:13:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:13:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:13:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:14:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:14:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:14:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:14:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:14:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:14:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:14:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:14:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:14:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:15:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:15:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:15:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:15:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:15:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:15:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:15:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:15:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:15:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:16:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:16:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:16:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:16:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:16:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:16:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:16:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:16:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:16:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:17:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:17:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:17:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:17:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:17:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:17:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:17:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:17:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:17:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:18:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:18:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:18:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:18:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:18:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:18:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:18:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:18:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:18:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:19:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:19:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:19:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:19:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:19:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:19:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:19:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:19:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:19:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:20:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:20:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:20:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:20:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:20:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:20:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:20:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:20:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:20:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:21:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:21:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:21:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:21:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:21:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:21:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:21:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:21:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:21:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:22:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:22:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:22:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:22:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:22:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:22:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:22:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:22:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:22:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:23:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:23:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:23:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:23:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:23:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:23:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:23:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:23:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:23:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:24:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:24:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:24:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:24:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:24:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:24:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:24:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:24:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:24:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:25:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:25:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:25:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:25:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:25:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:25:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:25:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:25:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:25:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:26:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:26:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:26:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:26:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:26:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:26:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:26:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:26:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:26:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:27:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:27:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:27:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:27:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:27:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:27:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:27:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:27:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:27:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:28:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:28:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:28:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:28:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:28:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:28:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:28:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:28:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:28:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:29:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:29:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:29:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:29:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:29:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:29:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:29:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:29:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:29:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:30:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:30:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:30:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:30:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:30:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:30:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:30:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:30:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:30:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:31:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:31:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:31:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:31:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:31:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:31:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:31:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:31:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:31:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}], "final_layernorm": {"weight": "_np:final_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "lm_head": {"linear_type": "column", "weight": "_np:lm_head:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:lm_head:weights_scaling_factor", "prequant_scaling_factor": "_np:lm_head:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "share_embedding_table": false}

mistral_tp1_rank0.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:749d54b262dbb4d0e1cdcaaff23e323cf040b6f17f8fa8a668a8abad0f370721
+size 14695822368