TheBloke commited on
Commit
f802737
1 Parent(s): 2d4454a

GPTQ model commit

Browse files
config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_name_or_path": "/workspace/process/undi95_mixtral-8x7b-moe-rp-story/source",
3
  "architectures": [
4
- "MistralForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 1,
@@ -20,52 +20,73 @@
20
  "output_router_logits": false,
21
  "pad_token_id": 0,
22
  "pretraining_tp": 1,
23
- "rms_norm_eps": 1e-05,
24
- "rope_theta": 10000.0,
25
- "router_aux_loss_coef": 0.001,
26
- "sliding_window": 4096,
27
- "tie_word_embeddings": false,
28
- "torch_dtype": "bfloat16",
29
- "transformers_version": "4.37.0.dev0",
30
- "use_cache": true,
31
- "vocab_size": 32000,
32
  "quantization_config": {
 
33
  "bits": 4,
34
- "modules_in_block_to_quantize" :[
35
- ["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
36
- ["self_attn.o_proj"],
37
- [
38
- "block_sparse_moe.experts.0.w1",
39
- "block_sparse_moe.experts.1.w1",
40
- "block_sparse_moe.experts.2.w1",
41
- "block_sparse_moe.experts.3.w1",
42
- "block_sparse_moe.experts.4.w1",
43
- "block_sparse_moe.experts.5.w1",
44
- "block_sparse_moe.experts.6.w1",
45
- "block_sparse_moe.experts.7.w1",
46
- "block_sparse_moe.experts.0.w3",
47
- "block_sparse_moe.experts.1.w3",
48
- "block_sparse_moe.experts.2.w3",
49
- "block_sparse_moe.experts.3.w3",
50
- "block_sparse_moe.experts.4.w3",
51
- "block_sparse_moe.experts.5.w3",
52
- "block_sparse_moe.experts.6.w3",
53
- "block_sparse_moe.experts.7.w3" ],
54
- [ "block_sparse_moe.experts.0.w2",
55
- "block_sparse_moe.experts.1.w2",
56
- "block_sparse_moe.experts.2.w2",
57
- "block_sparse_moe.experts.3.w2",
58
- "block_sparse_moe.experts.4.w2",
59
- "block_sparse_moe.experts.5.w2",
60
- "block_sparse_moe.experts.6.w2",
61
- "block_sparse_moe.experts.7.w2" ] ],
62
- "group_size": -1,
63
  "damp_percent": 0.1,
64
  "desc_act": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  "sym": true,
 
66
  "true_sequential": true,
67
- "model_name_or_path": null,
68
- "model_file_base_name": "model",
69
- "quant_method": "gptq"
70
- }
71
- }
 
 
 
 
 
 
 
 
 
1
  {
2
  "_name_or_path": "/workspace/process/undi95_mixtral-8x7b-moe-rp-story/source",
3
  "architectures": [
4
+ "MixtralForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 1,
 
20
  "output_router_logits": false,
21
  "pad_token_id": 0,
22
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
 
23
  "quantization_config": {
24
+ "batch_size": 1,
25
  "bits": 4,
26
+ "block_name_to_quantize": null,
27
+ "cache_block_outputs": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "damp_percent": 0.1,
29
  "desc_act": true,
30
+ "exllama_config": {
31
+ "version": 1
32
+ },
33
+ "group_size": -1,
34
+ "max_input_length": null,
35
+ "model_seqlen": null,
36
+ "module_name_preceding_first_block": null,
37
+ "modules_in_block_to_quantize": [
38
+ [
39
+ "self_attn.k_proj",
40
+ "self_attn.v_proj",
41
+ "self_attn.q_proj"
42
+ ],
43
+ [
44
+ "self_attn.o_proj"
45
+ ],
46
+ [
47
+ "block_sparse_moe.experts.0.w1",
48
+ "block_sparse_moe.experts.1.w1",
49
+ "block_sparse_moe.experts.2.w1",
50
+ "block_sparse_moe.experts.3.w1",
51
+ "block_sparse_moe.experts.4.w1",
52
+ "block_sparse_moe.experts.5.w1",
53
+ "block_sparse_moe.experts.6.w1",
54
+ "block_sparse_moe.experts.7.w1",
55
+ "block_sparse_moe.experts.0.w3",
56
+ "block_sparse_moe.experts.1.w3",
57
+ "block_sparse_moe.experts.2.w3",
58
+ "block_sparse_moe.experts.3.w3",
59
+ "block_sparse_moe.experts.4.w3",
60
+ "block_sparse_moe.experts.5.w3",
61
+ "block_sparse_moe.experts.6.w3",
62
+ "block_sparse_moe.experts.7.w3"
63
+ ],
64
+ [
65
+ "block_sparse_moe.experts.0.w2",
66
+ "block_sparse_moe.experts.1.w2",
67
+ "block_sparse_moe.experts.2.w2",
68
+ "block_sparse_moe.experts.3.w2",
69
+ "block_sparse_moe.experts.4.w2",
70
+ "block_sparse_moe.experts.5.w2",
71
+ "block_sparse_moe.experts.6.w2",
72
+ "block_sparse_moe.experts.7.w2"
73
+ ]
74
+ ],
75
+ "pad_token_id": null,
76
+ "quant_method": "gptq",
77
  "sym": true,
78
+ "tokenizer": null,
79
  "true_sequential": true,
80
+ "use_cuda_fp16": false,
81
+ "use_exllama": true
82
+ },
83
+ "rms_norm_eps": 1e-05,
84
+ "rope_theta": 1000000.0,
85
+ "router_aux_loss_coef": 0.02,
86
+ "sliding_window": 4096,
87
+ "tie_word_embeddings": false,
88
+ "torch_dtype": "bfloat16",
89
+ "transformers_version": "4.37.0.dev0",
90
+ "use_cache": true,
91
+ "vocab_size": 32000
92
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.37.0.dev0"
7
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f9ef92b3d16e82e3de385999843d9cf49cd3a4e8454d2cac47273a3ce7924d6
3
- size 23811549344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574743b06845d2b79545c257ecbe5932e885171ff956338d3cb9bdf49d8f1ae4
3
+ size 23811551008
quantize_config.json CHANGED
@@ -1,10 +1,8 @@
1
  {
2
- "bits": 4,
3
- "group_size": -1,
4
- "damp_percent": 0.1,
5
- "desc_act": true,
6
- "sym": true,
7
- "true_sequential": true,
8
- "model_name_or_path": null,
9
- "model_file_base_name": "model"
10
  }
 
1
  {
2
+ "bits": 4,
3
+ "group_size": -1,
4
+ "damp_percent": 0.1,
5
+ "desc_act": true,
6
+ "sym": true,
7
+ "true_sequential": true
 
 
8
  }