mgoin commited on
Commit
11d3311
1 Parent(s): 6e35316

Updated compression_config to quantization_config

Browse files
Files changed (1) hide show
  1. config.json +38 -38
config.json CHANGED
@@ -12,43 +12,6 @@
12
  },
13
  "aux_loss_alpha": 0.001,
14
  "bos_token_id": 100000,
15
- "compression_config": {
16
- "config_groups": {
17
- "group_0": {
18
- "input_activations": null,
19
- "output_activations": null,
20
- "targets": [
21
- "Linear"
22
- ],
23
- "weights": {
24
- "actorder": null,
25
- "block_structure": null,
26
- "dynamic": false,
27
- "group_size": null,
28
- "num_bits": 4,
29
- "observer": "minmax",
30
- "observer_kwargs": {},
31
- "strategy": "channel",
32
- "symmetric": true,
33
- "type": "int"
34
- }
35
- }
36
- },
37
- "format": "pack-quantized",
38
- "global_compression_ratio": 2.265805157986176,
39
- "ignore": [
40
- "lm_head"
41
- ],
42
- "kv_cache_scheme": null,
43
- "quant_method": "compressed-tensors",
44
- "quantization_status": "compressed",
45
- "sparsity_config": {
46
- "format": "dense",
47
- "global_sparsity": 0.21918901165186397,
48
- "registry_requires_subclass": false,
49
- "sparsity_structure": "unstructured"
50
- }
51
- },
52
  "eos_token_id": 100001,
53
  "ep_size": 1,
54
  "first_k_dense_replace": 1,
@@ -94,5 +57,42 @@
94
  "transformers_version": "4.44.2",
95
  "use_cache": true,
96
  "v_head_dim": 128,
97
- "vocab_size": 102400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  }
 
12
  },
13
  "aux_loss_alpha": 0.001,
14
  "bos_token_id": 100000,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "eos_token_id": 100001,
16
  "ep_size": 1,
17
  "first_k_dense_replace": 1,
 
57
  "transformers_version": "4.44.2",
58
  "use_cache": true,
59
  "v_head_dim": 128,
60
+ "vocab_size": 102400,
61
+ "quantization_config": {
62
+ "config_groups": {
63
+ "group_0": {
64
+ "input_activations": null,
65
+ "output_activations": null,
66
+ "targets": [
67
+ "Linear"
68
+ ],
69
+ "weights": {
70
+ "actorder": null,
71
+ "block_structure": null,
72
+ "dynamic": false,
73
+ "group_size": null,
74
+ "num_bits": 4,
75
+ "observer": "minmax",
76
+ "observer_kwargs": {},
77
+ "strategy": "channel",
78
+ "symmetric": true,
79
+ "type": "int"
80
+ }
81
+ }
82
+ },
83
+ "format": "pack-quantized",
84
+ "global_compression_ratio": 2.265805157986176,
85
+ "ignore": [
86
+ "lm_head"
87
+ ],
88
+ "kv_cache_scheme": null,
89
+ "quant_method": "compressed-tensors",
90
+ "quantization_status": "compressed",
91
+ "sparsity_config": {
92
+ "format": "dense",
93
+ "global_sparsity": 0.21918901165186397,
94
+ "registry_requires_subclass": false,
95
+ "sparsity_structure": "unstructured"
96
+ }
97
+ }
98
  }