gstaff commited on
Commit
4128c4a
1 Parent(s): d186f8f

Upload pre-quantized weight files.

Browse files
Files changed (38) hide show
  1. mistral7b_hf_tokenizer/config.json +24 -0
  2. mistral7b_hf_tokenizer/tokenizer.json +0 -0
  3. mistral7b_hf_tokenizer/tokenizer.model +3 -0
  4. mistral7b_hf_tokenizer/tokenizer_config.json +43 -0
  5. mistral_kv_int8_scales/model.layers.0.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  6. mistral_kv_int8_scales/model.layers.1.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  7. mistral_kv_int8_scales/model.layers.10.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  8. mistral_kv_int8_scales/model.layers.11.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  9. mistral_kv_int8_scales/model.layers.12.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  10. mistral_kv_int8_scales/model.layers.13.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  11. mistral_kv_int8_scales/model.layers.14.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  12. mistral_kv_int8_scales/model.layers.15.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  13. mistral_kv_int8_scales/model.layers.16.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  14. mistral_kv_int8_scales/model.layers.17.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  15. mistral_kv_int8_scales/model.layers.18.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  16. mistral_kv_int8_scales/model.layers.19.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  17. mistral_kv_int8_scales/model.layers.2.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  18. mistral_kv_int8_scales/model.layers.20.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  19. mistral_kv_int8_scales/model.layers.21.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  20. mistral_kv_int8_scales/model.layers.22.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  21. mistral_kv_int8_scales/model.layers.23.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  22. mistral_kv_int8_scales/model.layers.24.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  23. mistral_kv_int8_scales/model.layers.25.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  24. mistral_kv_int8_scales/model.layers.26.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  25. mistral_kv_int8_scales/model.layers.27.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  26. mistral_kv_int8_scales/model.layers.28.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  27. mistral_kv_int8_scales/model.layers.29.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  28. mistral_kv_int8_scales/model.layers.3.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  29. mistral_kv_int8_scales/model.layers.30.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  30. mistral_kv_int8_scales/model.layers.31.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  31. mistral_kv_int8_scales/model.layers.4.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  32. mistral_kv_int8_scales/model.layers.5.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  33. mistral_kv_int8_scales/model.layers.6.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  34. mistral_kv_int8_scales/model.layers.7.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  35. mistral_kv_int8_scales/model.layers.8.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  36. mistral_kv_int8_scales/model.layers.9.attention.query_key_value.scale_y_quant_orig.bin +3 -0
  37. mistral_tp1.json +1 -0
  38. mistral_tp1_rank0.npz +3 -0
mistral7b_hf_tokenizer/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MistralForCausalLM"
4
+ ],
5
+ "bos_token_id": 1,
6
+ "eos_token_id": 2,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 4096,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 14336,
11
+ "max_position_embeddings": 32768,
12
+ "model_type": "mistral",
13
+ "num_attention_heads": 32,
14
+ "num_hidden_layers": 32,
15
+ "num_key_value_heads": 8,
16
+ "rms_norm_eps": 1e-05,
17
+ "rope_theta": 10000.0,
18
+ "sliding_window": 4096,
19
+ "tie_word_embeddings": false,
20
+ "torch_dtype": "bfloat16",
21
+ "transformers_version": "4.34.0.dev0",
22
+ "use_cache": true,
23
+ "vocab_size": 32000
24
+ }
mistral7b_hf_tokenizer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
mistral7b_hf_tokenizer/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
mistral7b_hf_tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": null,
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
mistral_kv_int8_scales/model.layers.0.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bc44fc2c456b059d990917917da72dd109976a923dc288ae4ac619dc4532954
3
+ size 4
mistral_kv_int8_scales/model.layers.1.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b6d565de118af220b4df4d235cabd7966f4979277aad42ab2e6233818d32315
3
+ size 4
mistral_kv_int8_scales/model.layers.10.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b49724f57dead0e75e7260ecab4fafbf9beae569b891c7735c76a9d770966f
3
+ size 4
mistral_kv_int8_scales/model.layers.11.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0afe78cd6e62ee5582fcc51d26b08b47cdc7fcd49c29985c9f93e647380f17cf
3
+ size 4
mistral_kv_int8_scales/model.layers.12.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fa39ab9dc429f604f0b38adeb6b24b53ad538f5e41e575c08e751284a5cb88f
3
+ size 4
mistral_kv_int8_scales/model.layers.13.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80bdbf53669062b68b56889fbacd864a679e79ad3ef9acae73aa7ecac05be812
3
+ size 4
mistral_kv_int8_scales/model.layers.14.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a48548451e4397cec752b6c52e6db1d944f40059e7175fb1a117f4486dc5fe3
3
+ size 4
mistral_kv_int8_scales/model.layers.15.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:727854598f7fd3dd59bdc21944ad0b8af92d4db05055ba3c11e39b0b1a447698
3
+ size 4
mistral_kv_int8_scales/model.layers.16.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdd5d05501ee04109505678c4c15dc3cfd1d061ca03455b090d5ba8a9a8e9459
3
+ size 4
mistral_kv_int8_scales/model.layers.17.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f9272daf1ee10098346ca48e69da6d4070af90ecb9d5d681b0a7979c293f547
3
+ size 4
mistral_kv_int8_scales/model.layers.18.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bde49682090fe53da56339249648ef6cfc15671d98a4e85bd7e78ee8ae9c53f
3
+ size 4
mistral_kv_int8_scales/model.layers.19.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2e1aa91d2a28b5af44c25841f53028e5f726f0c26bde790795dca7bdf4c0f7d
3
+ size 4
mistral_kv_int8_scales/model.layers.2.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad1e79869ecf5664ab6538eb4385798c617c1c5a432cef6953cb9073e3b353e5
3
+ size 4
mistral_kv_int8_scales/model.layers.20.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:867395ba08a1e01107c4d697a9eb91ed29f616910e2ba18afb54ea91bd5e582d
3
+ size 4
mistral_kv_int8_scales/model.layers.21.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa8a8445c41a3e2572df89548bef055d40a125f668bd227852d22721dd882816
3
+ size 4
mistral_kv_int8_scales/model.layers.22.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a51c4f50f10a5a86ab207abfc1298382ce24ababd7b5b5701f50cb237888f8
3
+ size 4
mistral_kv_int8_scales/model.layers.23.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15a07322cdc0e38ab40eeb7141b4f48ac293ebc361e945bca214f72a33d67899
3
+ size 4
mistral_kv_int8_scales/model.layers.24.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:691ecc7795e93dc213c786ee59a4462735e869870007fb4f656e1eb8d7acabdf
3
+ size 4
mistral_kv_int8_scales/model.layers.25.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13d444885f17085b8d4f58b949b605af0cd692e6f69ce5d44a0aa8a74f27b3f0
3
+ size 4
mistral_kv_int8_scales/model.layers.26.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b5bdb0df8798ccbbcf56e9e8c5c3cf45b2688ee5216549904db12cbe9aacd24
3
+ size 4
mistral_kv_int8_scales/model.layers.27.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fe25012a695e68ced4d94c906a480d2b08caed618bbc706518205de2906890b
3
+ size 4
mistral_kv_int8_scales/model.layers.28.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0afe78cd6e62ee5582fcc51d26b08b47cdc7fcd49c29985c9f93e647380f17cf
3
+ size 4
mistral_kv_int8_scales/model.layers.29.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cf70ecb1f4b406332d5d82e3502abcbeb6a0ea597064021a8dac3d5408da6c0
3
+ size 4
mistral_kv_int8_scales/model.layers.3.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea546aba6d258706be41571e9a103ae31bd3ffa14eb6312e781823240b13e412
3
+ size 4
mistral_kv_int8_scales/model.layers.30.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee1fadea8f44339ec9ff0b4c838f1e985404dd67e1bfbd9f53650a4c92a46ada
3
+ size 4
mistral_kv_int8_scales/model.layers.31.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0026a0b3dc7f8556c0e370e862617d5a80f3ad0428526531ac8dfbfbb8f1ccba
3
+ size 4
mistral_kv_int8_scales/model.layers.4.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6db7167010a4526848105812d338f8217e9b8fd12d9b80f8749308f19983bcb
3
+ size 4
mistral_kv_int8_scales/model.layers.5.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bacefe3e82fa8a018f6a36c37b46fa77fb9b57fea346cfbb4f1bcf706b09180
3
+ size 4
mistral_kv_int8_scales/model.layers.6.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b6907325a655ee634e411c3d4431705e12bde53e7f0d63c5ddccffcfaa4a259
3
+ size 4
mistral_kv_int8_scales/model.layers.7.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15a07322cdc0e38ab40eeb7141b4f48ac293ebc361e945bca214f72a33d67899
3
+ size 4
mistral_kv_int8_scales/model.layers.8.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b52e5265f7c06c2bd4a0ec653672c39a55d1fa08fbf56096780709ee273346d
3
+ size 4
mistral_kv_int8_scales/model.layers.9.attention.query_key_value.scale_y_quant_orig.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bf6fdeab9aef1cd59b6f4383538f705d3a30cd0ee457941c25b2f065944b164
3
+ size 4
mistral_tp1.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"version": 0.4, "quantization": "int4_awq", "awq_block_size": 128, "dtype": "float16", "vocab_size": 32000, "rank": 0, "tensor_parallel": 1, "vocab_embedding": {"weight": "_np:vocab_embedding:weight"}, "positional_embedding": null, "layers": [{"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:0:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:0:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:0:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:0:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:0:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:0:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:0:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:0:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:0:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:1:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:1:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:1:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:1:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:1:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:1:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:1:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:1:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:1:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:2:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:2:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:2:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:2:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:2:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:2:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:2:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:2:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:2:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:3:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:3:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:3:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:3:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:3:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:3:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:3:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:3:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:3:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:4:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:4:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:4:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:4:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:4:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:4:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:4:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:4:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:4:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:5:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:5:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:5:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:5:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:5:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:5:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:5:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:5:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:5:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:6:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:6:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:6:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:6:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:6:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:6:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:6:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:6:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:6:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:7:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:7:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:7:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:7:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:7:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:7:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:7:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:7:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:7:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:8:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:8:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:8:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:8:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:8:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:8:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:8:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:8:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:8:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:9:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:9:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:9:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:9:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:9:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:9:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:9:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:9:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:9:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:10:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:10:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:10:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:10:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:10:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:10:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:10:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:10:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:10:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:11:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:11:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:11:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:11:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:11:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:11:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:11:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:11:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:11:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:12:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:12:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:12:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:12:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:12:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:12:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:12:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:12:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:12:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:13:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:13:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:13:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:13:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:13:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:13:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:13:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:13:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:13:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:14:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:14:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:14:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:14:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:14:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:14:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:14:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:14:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:14:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:15:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:15:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:15:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:15:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:15:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:15:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:15:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:15:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:15:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:16:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:16:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:16:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:16:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:16:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:16:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:16:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:16:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:16:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:17:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:17:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:17:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:17:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:17:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:17:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:17:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:17:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:17:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:18:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:18:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:18:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:18:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:18:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:18:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:18:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:18:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:18:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:19:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:19:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:19:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:19:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:19:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:19:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:19:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:19:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:19:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:20:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:20:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:20:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:20:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:20:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:20:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:20:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:20:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:20:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:21:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:21:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:21:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:21:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:21:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:21:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:21:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:21:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:21:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:22:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:22:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:22:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:22:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:22:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:22:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:22:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:22:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:22:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:23:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:23:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:23:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:23:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:23:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:23:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:23:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:23:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:23:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:24:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:24:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:24:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:24:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:24:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:24:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:24:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:24:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:24:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:25:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:25:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:25:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:25:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:25:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:25:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:25:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:25:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:25:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:26:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:26:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:26:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:26:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:26:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:26:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:26:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:26:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:26:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:27:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:27:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:27:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:27:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:27:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:27:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:27:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:27:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:27:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:28:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:28:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:28:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:28:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:28:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:28:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:28:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:28:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:28:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:29:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:29:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:29:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:29:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:29:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:29:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:29:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:29:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:29:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:30:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:30:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:30:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:30:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:30:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:30:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:30:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:30:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:30:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:31:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:31:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:31:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:31:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:31:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:31:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:31:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:31:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:31:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}], "final_layernorm": {"weight": "_np:final_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "lm_head": {"linear_type": "column", "weight": "_np:lm_head:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:lm_head:weights_scaling_factor", "prequant_scaling_factor": "_np:lm_head:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "share_embedding_table": false}
mistral_tp1_rank0.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:749d54b262dbb4d0e1cdcaaff23e323cf040b6f17f8fa8a668a8abad0f370721
3
+ size 14695822368