TheBloke commited on
Commit
2d4454a
1 Parent(s): cb9de2f

Update config.json for Transformers GPTQ support

Browse files
Files changed (1) hide show
  1. config.json +29 -1
config.json CHANGED
@@ -31,6 +31,34 @@
31
  "vocab_size": 32000,
32
  "quantization_config": {
33
  "bits": 4,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  "group_size": -1,
35
  "damp_percent": 0.1,
36
  "desc_act": true,
@@ -40,4 +68,4 @@
40
  "model_file_base_name": "model",
41
  "quant_method": "gptq"
42
  }
43
- }
 
31
  "vocab_size": 32000,
32
  "quantization_config": {
33
  "bits": 4,
34
+ "modules_in_block_to_quantize" :[
35
+ ["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
36
+ ["self_attn.o_proj"],
37
+ [
38
+ "block_sparse_moe.experts.0.w1",
39
+ "block_sparse_moe.experts.1.w1",
40
+ "block_sparse_moe.experts.2.w1",
41
+ "block_sparse_moe.experts.3.w1",
42
+ "block_sparse_moe.experts.4.w1",
43
+ "block_sparse_moe.experts.5.w1",
44
+ "block_sparse_moe.experts.6.w1",
45
+ "block_sparse_moe.experts.7.w1",
46
+ "block_sparse_moe.experts.0.w3",
47
+ "block_sparse_moe.experts.1.w3",
48
+ "block_sparse_moe.experts.2.w3",
49
+ "block_sparse_moe.experts.3.w3",
50
+ "block_sparse_moe.experts.4.w3",
51
+ "block_sparse_moe.experts.5.w3",
52
+ "block_sparse_moe.experts.6.w3",
53
+ "block_sparse_moe.experts.7.w3" ],
54
+ [ "block_sparse_moe.experts.0.w2",
55
+ "block_sparse_moe.experts.1.w2",
56
+ "block_sparse_moe.experts.2.w2",
57
+ "block_sparse_moe.experts.3.w2",
58
+ "block_sparse_moe.experts.4.w2",
59
+ "block_sparse_moe.experts.5.w2",
60
+ "block_sparse_moe.experts.6.w2",
61
+ "block_sparse_moe.experts.7.w2" ] ],
62
  "group_size": -1,
63
  "damp_percent": 0.1,
64
  "desc_act": true,
 
68
  "model_file_base_name": "model",
69
  "quant_method": "gptq"
70
  }
71
+ }