|
test_stage: |
|
obcq_modifiers: |
|
SmoothQuantModifier: |
|
smoothing_strength: 0.5 |
|
mappings: [ |
|
[["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"], |
|
[["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"] |
|
] |
|
QuantizationModifier: |
|
ignore: |
|
|
|
- LlamaRotaryEmbedding |
|
- LlamaRMSNorm |
|
- SiLUActivation |
|
|
|
- QuantizableMatMul |
|
|
|
- model.layers.21.mlp.down_proj |
|
- model.layers.7.mlp.down_proj |
|
- model.layers.2.mlp.down_proj |
|
- model.layers.20.mlp.down_proj |
|
- model.layers.19.mlp.down_proj |
|
post_oneshot_calibration: true |
|
scheme_overrides: |
|
Embedding: |
|
input_activations: null |
|
weights: |
|
num_bits: 8 |
|
symmetric: false |
|
SparseGPTModifier: |
|
sparsity: 0.5 |
|
block_size: 128 |
|
sequential_update: true |
|
quantize: true |
|
percdamp: 0.01 |
|
mask_structure: "0:0" |
|
targets: ["re:model.layers.\\d*$"] |