Upload pre-quantized weight files.
Browse files- mistral7b_hf_tokenizer/config.json +24 -0
- mistral7b_hf_tokenizer/tokenizer.json +0 -0
- mistral7b_hf_tokenizer/tokenizer.model +3 -0
- mistral7b_hf_tokenizer/tokenizer_config.json +43 -0
- mistral_kv_int8_scales/model.layers.0.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.1.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.10.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.11.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.12.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.13.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.14.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.15.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.16.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.17.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.18.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.19.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.2.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.20.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.21.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.22.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.23.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.24.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.25.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.26.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.27.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.28.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.29.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.3.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.30.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.31.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.4.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.5.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.6.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.7.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.8.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_kv_int8_scales/model.layers.9.attention.query_key_value.scale_y_quant_orig.bin +3 -0
- mistral_tp1.json +1 -0
- mistral_tp1_rank0.npz +3 -0
mistral7b_hf_tokenizer/config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"MistralForCausalLM"
|
4 |
+
],
|
5 |
+
"bos_token_id": 1,
|
6 |
+
"eos_token_id": 2,
|
7 |
+
"hidden_act": "silu",
|
8 |
+
"hidden_size": 4096,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 14336,
|
11 |
+
"max_position_embeddings": 32768,
|
12 |
+
"model_type": "mistral",
|
13 |
+
"num_attention_heads": 32,
|
14 |
+
"num_hidden_layers": 32,
|
15 |
+
"num_key_value_heads": 8,
|
16 |
+
"rms_norm_eps": 1e-05,
|
17 |
+
"rope_theta": 10000.0,
|
18 |
+
"sliding_window": 4096,
|
19 |
+
"tie_word_embeddings": false,
|
20 |
+
"torch_dtype": "bfloat16",
|
21 |
+
"transformers_version": "4.34.0.dev0",
|
22 |
+
"use_cache": true,
|
23 |
+
"vocab_size": 32000
|
24 |
+
}
|
mistral7b_hf_tokenizer/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mistral7b_hf_tokenizer/tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
|
3 |
+
size 493443
|
mistral7b_hf_tokenizer/tokenizer_config.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"additional_special_tokens": [],
|
31 |
+
"bos_token": "<s>",
|
32 |
+
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
|
33 |
+
"clean_up_tokenization_spaces": false,
|
34 |
+
"eos_token": "</s>",
|
35 |
+
"legacy": true,
|
36 |
+
"model_max_length": 1000000000000000019884624838656,
|
37 |
+
"pad_token": null,
|
38 |
+
"sp_model_kwargs": {},
|
39 |
+
"spaces_between_special_tokens": false,
|
40 |
+
"tokenizer_class": "LlamaTokenizer",
|
41 |
+
"unk_token": "<unk>",
|
42 |
+
"use_default_system_prompt": false
|
43 |
+
}
|
mistral_kv_int8_scales/model.layers.0.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bc44fc2c456b059d990917917da72dd109976a923dc288ae4ac619dc4532954
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.1.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b6d565de118af220b4df4d235cabd7966f4979277aad42ab2e6233818d32315
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.10.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73b49724f57dead0e75e7260ecab4fafbf9beae569b891c7735c76a9d770966f
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.11.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0afe78cd6e62ee5582fcc51d26b08b47cdc7fcd49c29985c9f93e647380f17cf
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.12.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fa39ab9dc429f604f0b38adeb6b24b53ad538f5e41e575c08e751284a5cb88f
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.13.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80bdbf53669062b68b56889fbacd864a679e79ad3ef9acae73aa7ecac05be812
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.14.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a48548451e4397cec752b6c52e6db1d944f40059e7175fb1a117f4486dc5fe3
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.15.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:727854598f7fd3dd59bdc21944ad0b8af92d4db05055ba3c11e39b0b1a447698
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.16.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdd5d05501ee04109505678c4c15dc3cfd1d061ca03455b090d5ba8a9a8e9459
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.17.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f9272daf1ee10098346ca48e69da6d4070af90ecb9d5d681b0a7979c293f547
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.18.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bde49682090fe53da56339249648ef6cfc15671d98a4e85bd7e78ee8ae9c53f
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.19.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2e1aa91d2a28b5af44c25841f53028e5f726f0c26bde790795dca7bdf4c0f7d
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.2.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad1e79869ecf5664ab6538eb4385798c617c1c5a432cef6953cb9073e3b353e5
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.20.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:867395ba08a1e01107c4d697a9eb91ed29f616910e2ba18afb54ea91bd5e582d
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.21.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa8a8445c41a3e2572df89548bef055d40a125f668bd227852d22721dd882816
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.22.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35a51c4f50f10a5a86ab207abfc1298382ce24ababd7b5b5701f50cb237888f8
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.23.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15a07322cdc0e38ab40eeb7141b4f48ac293ebc361e945bca214f72a33d67899
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.24.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:691ecc7795e93dc213c786ee59a4462735e869870007fb4f656e1eb8d7acabdf
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.25.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13d444885f17085b8d4f58b949b605af0cd692e6f69ce5d44a0aa8a74f27b3f0
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.26.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b5bdb0df8798ccbbcf56e9e8c5c3cf45b2688ee5216549904db12cbe9aacd24
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.27.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fe25012a695e68ced4d94c906a480d2b08caed618bbc706518205de2906890b
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.28.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0afe78cd6e62ee5582fcc51d26b08b47cdc7fcd49c29985c9f93e647380f17cf
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.29.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cf70ecb1f4b406332d5d82e3502abcbeb6a0ea597064021a8dac3d5408da6c0
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.3.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea546aba6d258706be41571e9a103ae31bd3ffa14eb6312e781823240b13e412
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.30.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee1fadea8f44339ec9ff0b4c838f1e985404dd67e1bfbd9f53650a4c92a46ada
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.31.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0026a0b3dc7f8556c0e370e862617d5a80f3ad0428526531ac8dfbfbb8f1ccba
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.4.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6db7167010a4526848105812d338f8217e9b8fd12d9b80f8749308f19983bcb
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.5.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bacefe3e82fa8a018f6a36c37b46fa77fb9b57fea346cfbb4f1bcf706b09180
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.6.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b6907325a655ee634e411c3d4431705e12bde53e7f0d63c5ddccffcfaa4a259
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.7.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15a07322cdc0e38ab40eeb7141b4f48ac293ebc361e945bca214f72a33d67899
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.8.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b52e5265f7c06c2bd4a0ec653672c39a55d1fa08fbf56096780709ee273346d
|
3 |
+
size 4
|
mistral_kv_int8_scales/model.layers.9.attention.query_key_value.scale_y_quant_orig.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bf6fdeab9aef1cd59b6f4383538f705d3a30cd0ee457941c25b2f065944b164
|
3 |
+
size 4
|
mistral_tp1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"version": 0.4, "quantization": "int4_awq", "awq_block_size": 128, "dtype": "float16", "vocab_size": 32000, "rank": 0, "tensor_parallel": 1, "vocab_embedding": {"weight": "_np:vocab_embedding:weight"}, "positional_embedding": null, "layers": [{"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:0:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:0:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:0:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:0:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:0:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:0:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:0:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:0:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:0:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:1:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:1:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:1:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:1:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:1:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:1:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:1:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:1:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:1:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:2:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:2:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:2:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:2:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:2:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:2:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:2:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:2:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:2:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:3:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:3:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:3:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:3:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:3:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:3:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:3:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:3:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:3:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:4:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:4:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:4:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:4:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:4:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:4:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:4:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:4:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:4:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:5:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:5:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:5:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:5:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:5:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:5:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:5:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:5:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:5:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:6:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:6:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:6:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:6:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:6:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:6:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:6:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:6:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:6:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:7:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:7:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:7:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:7:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:7:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:7:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:7:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:7:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:7:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:8:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:8:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:8:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:8:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:8:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:8:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:8:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:8:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:8:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:9:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:9:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:9:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:9:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:9:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:9:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:9:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:9:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:9:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:10:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:10:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:10:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:10:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:10:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:10:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:10:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:10:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:10:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:11:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:11:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:11:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:11:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:11:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:11:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:11:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:11:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:11:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:12:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:12:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:12:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:12:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:12:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:12:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:12:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:12:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:12:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:13:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:13:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:13:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:13:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:13:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:13:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:13:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:13:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:13:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:14:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:14:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:14:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:14:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:14:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:14:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:14:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:14:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:14:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:15:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:15:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:15:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:15:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:15:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:15:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:15:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:15:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:15:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:16:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:16:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:16:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:16:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:16:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:16:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:16:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:16:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:16:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:17:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:17:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:17:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:17:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:17:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:17:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:17:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:17:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:17:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:18:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:18:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:18:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:18:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:18:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:18:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:18:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:18:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:18:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:19:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:19:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:19:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:19:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:19:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:19:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:19:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:19:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:19:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:20:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:20:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:20:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:20:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:20:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:20:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:20:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:20:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:20:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:21:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:21:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:21:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:21:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:21:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:21:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:21:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:21:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:21:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:22:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:22:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:22:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:22:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:22:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:22:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:22:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:22:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:22:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:23:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:23:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:23:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:23:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:23:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:23:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:23:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:23:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:23:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:24:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:24:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:24:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:24:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:24:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:24:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:24:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:24:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:24:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:25:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:25:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:25:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:25:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:25:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:25:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:25:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:25:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:25:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:26:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:26:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:26:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:26:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:26:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:26:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:26:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:26:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:26:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:27:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:27:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:27:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:27:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:27:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:27:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:27:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:27:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:27:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:28:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:28:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:28:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:28:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:28:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:28:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:28:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:28:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:28:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:29:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:29:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:29:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:29:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:29:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:29:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:29:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:29:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:29:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:30:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:30:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:30:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:30:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:30:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:30:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:30:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:30:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:30:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:31:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:31:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:31:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:31:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:31:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:31:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:31:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:31:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:31:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}], "final_layernorm": {"weight": "_np:final_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "lm_head": {"linear_type": "column", "weight": "_np:lm_head:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:lm_head:weights_scaling_factor", "prequant_scaling_factor": "_np:lm_head:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "share_embedding_table": false}
|
mistral_tp1_rank0.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:749d54b262dbb4d0e1cdcaaff23e323cf040b6f17f8fa8a668a8abad0f370721
|
3 |
+
size 14695822368
|