add quantized version
Browse files- .gitattributes +2 -0
- LOG_quant.log +336 -0
- config.json +26 -0
- generation_config.json +11 -0
- gptq_model-4bit-128g.bin +3 -0
- gptq_model-4bit-128g.safetensors +3 -0
- quantize_config.json +8 -0
- special_tokens_map.json +14 -0
- tokenizer.json +0 -0
- tokenizer_config.json +9 -0
.gitattributes
CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
gptq_model-4bit-128g.bin filter=lfs diff=lfs merge=lfs -text
|
36 |
+
gptq_model-4bit-128g.safetensors filter=lfs diff=lfs merge=lfs -text
|
LOG_quant.log
ADDED
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
05/07/2023 04:42:05 WARNING Found cached dataset parquet (/home/pszemraj/.cache/huggingface/datasets/OpenAssistant___parquet/OpenAssistant--oasst1-2960c57d7e52ab15/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
2 |
+
05/07/2023 04:42:06 WARNING No such comm: c8c073cce7994da5b454ed0300090049
|
3 |
+
05/07/2023 04:42:06 WARNING No such comm: 1103c6a0950249ca863ebc8399fddfef
|
4 |
+
05/07/2023 04:42:06 WARNING No such comm: 5c3ce017525f4406904695297ace8724
|
5 |
+
05/07/2023 04:42:06 WARNING No such comm: c5ceaf44ed3942cdb730705e230f024b
|
6 |
+
05/07/2023 04:42:06 WARNING No such comm: f953c7265b2248c98cc4dbe971b44f3d
|
7 |
+
05/07/2023 04:42:06 WARNING No such comm: 687a131767524803a41093a1d84f4652
|
8 |
+
05/07/2023 04:42:06 WARNING No such comm: 93293aa5cce946bc8c6aa6ee4d0eaeb1
|
9 |
+
05/07/2023 04:42:06 WARNING No such comm: 637d46ef1d57406a817ef020d0c7bf06
|
10 |
+
05/07/2023 04:42:06 WARNING No such comm: 494913a72a3b4802b2390b58f38a3a36
|
11 |
+
05/07/2023 04:42:06 WARNING No such comm: 2678191b17564118a9e16b1201d9b4d2
|
12 |
+
05/07/2023 04:42:06 WARNING No such comm: 891bcbcf176840789f36c723e386c9b9
|
13 |
+
05/07/2023 04:42:06 INFO Quantized model will be saved to: /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g
|
14 |
+
05/07/2023 04:42:14 INFO Running quantization..
|
15 |
+
05/07/2023 04:42:16 INFO Start quantizing layer 1/16
|
16 |
+
05/07/2023 04:42:49 INFO Quantizing attention.query_key_value in layer 1/16...
|
17 |
+
05/07/2023 04:42:50 INFO duration: 1.0365328788757324
|
18 |
+
05/07/2023 04:42:50 INFO avg loss: 0.2228083991395018
|
19 |
+
05/07/2023 04:43:23 INFO Quantizing attention.dense in layer 1/16...
|
20 |
+
05/07/2023 04:43:24 INFO duration: 0.7084124088287354
|
21 |
+
05/07/2023 04:43:24 INFO avg loss: 0.01904001936744958
|
22 |
+
05/07/2023 04:43:57 INFO Quantizing mlp.dense_h_to_4h in layer 1/16...
|
23 |
+
05/07/2023 04:43:58 INFO duration: 1.0652313232421875
|
24 |
+
05/07/2023 04:43:58 INFO avg loss: 0.304011920770505/07/2023 04:47:44 INFO Quantizing mlp.dense_4h_to_h in layer 1/16...
|
25 |
+
05/07/2023 04:47:51 INFO duration: 6.762867212295532
|
26 |
+
05/07/2023 04:47:51 INFO avg loss: 0.028748639221516405
|
27 |
+
05/07/2023 04:48:12 INFO Start quantizing layer 2/16
|
28 |
+
05/07/2023 04:48:45 INFO Quantizing attention.query_key_value in layer 2/16...
|
29 |
+
05/07/2023 04:48:46 INFO duration: 0.9713742733001709
|
30 |
+
05/07/2023 04:48:46 INFO avg loss: 0.35355199259310105
|
31 |
+
05/07/2023 04:49:19 INFO Quantizing attention.dense in layer 2/16...
|
32 |
+
05/07/2023 04:49:20 INFO duration: 0.7275807857513428
|
33 |
+
05/07/2023 04:49:20 INFO avg loss: 0.06647738861961487
|
34 |
+
05/07/2023 04:49:53 INFO Quantizing mlp.dense_h_to_4h in layer 2/16...
|
35 |
+
05/07/2023 04:49:54 INFO duration: 1.083951711654663
|
36 |
+
05/07/2023 04:49:54 INFO avg loss: 0.6772610437882721
|
37 |
+
05/07/2023 04:53:40 INFO Quantizing mlp.dense_4h_to_h in layer 2/16...
|
38 |
+
05/07/2023 04:53:47 INFO duration: 6.844736814498901
|
39 |
+
05/07/2023 04:53:47 INFO avg loss: 0.05320497620473908
|
40 |
+
05/07/2023 04:54:08 INFO Start quantizing layer 3/16
|
41 |
+
05/07/2023 04:54:41 INFO Quantizing attention.query_key_value in layer 3/16...
|
42 |
+
05/07/2023 04:54:42 INFO duration: 0.9685044288635254
|
43 |
+
05/07/2023 04:54:42 INFO avg loss: 0.6015139448756989
|
44 |
+
05/07/2023 04:55:15 INFO Quantizing attention.dense in layer 3/16...
|
45 |
+
05/07/2023 04:55:16 INFO duration: 0.7167198657989502
|
46 |
+
05/07/2023 04:55:16 INFO avg loss: 0.06039099241344058
|
47 |
+
05/07/2023 04:55:49 INFO Quantizing mlp.dense_h_to_4h in layer 3/16...
|
48 |
+
05/07/2023 04:55:50 INFO duration: 1.0765190124511719
|
49 |
+
05/07/2023 04:55:50 INFO avg loss: 1.3903707193490416
|
50 |
+
05/07/2023 04:59:37 INFO Quantizing mlp.dense_4h_to_h in layer 3/16...
|
51 |
+
05/07/2023 04:59:43 INFO duration: 6.270395040512085
|
52 |
+
05/07/2023 04:59:43 INFO avg loss: 0.181059166011465
|
53 |
+
05/07/2023 05:00:04 INFO Start quantizing layer 4/16
|
54 |
+
05/07/2023 05:00:37 INFO Quantizing attention.query_key_value in layer 4/16...
|
55 |
+
05/07/2023 05:00:38 INFO duration: 0.9672496318817139
|
56 |
+
05/07/2023 05:00:38 INFO avg loss: 0.9807066506090255
|
57 |
+
05/07/2023 05:01:11 INFO Quantizing attention.dense in layer 4/16...
|
58 |
+
05/07/2023 05:01:12 INFO duration: 0.7248861789703369
|
59 |
+
05/07/2023 05:01:12 INFO avg loss: 0.1315788618418863
|
60 |
+
05/07/2023 05:01:45 INFO Quantizing mlp.dense_h_to_4h in layer 4/16...
|
61 |
+
05/07/2023 05:01:46 INFO duration: 1.083066463470459
|
62 |
+
05/07/2023 05:01:46 INFO avg loss: 2.080002984807641
|
63 |
+
05/07/2023 05:05:32 INFO Quantizing mlp.dense_4h_to_h in layer 4/16...
|
64 |
+
05/07/2023 05:05:38 INFO duration: 6.18793797492981
|
65 |
+
05/07/2023 05:05:38 INFO avg loss: 0.252437506240016
|
66 |
+
05/07/2023 05:05:59 INFO Start quantizing layer 5/16
|
67 |
+
05/07/2023 05:06:32 INFO Quantizing attention.query_key_value in layer 5/16...
|
68 |
+
05/07/2023 05:06:33 INFO duration: 0.9693779945373535
|
69 |
+
05/07/2023 05:06:33 INFO avg loss: 1.3782398682940629
|
70 |
+
05/07/2023 05:07:06 INFO Quantizing attention.dense in layer 5/16...
|
71 |
+
05/07/2023 05:07:07 INFO duration: 0.7210879325866699
|
72 |
+
05/07/2023 05:07:07 INFO avg loss: 0.14899523392779884
|
73 |
+
05/07/2023 05:07:40 INFO Quantizing mlp.dense_h_to_4h in layer 5/16...
|
74 |
+
05/07/2023 05:07:41 INFO duration: 1.0800914764404297
|
75 |
+
05/07/2023 05:07:41 INFO avg loss: 2.332041130025293
|
76 |
+
05/07/2023 05:11:27 INFO Quantizing mlp.dense_4h_to_h in layer 5/16...
|
77 |
+
05/07/2023 05:11:33 INFO duration: 6.191901206970215
|
78 |
+
05/07/2023 05:11:33 INFO avg loss: 0.3255492384060503
|
79 |
+
05/07/2023 05:11:54 INFO Start quantizing layer 6/16
|
80 |
+
05/07/2023 05:12:27 INFO Quantizing attention.query_key_value in layer 6/16...
|
81 |
+
05/07/2023 05:12:28 INFO duration: 0.9662725925445557
|
82 |
+
05/07/2023 05:12:28 INFO avg loss: 1.757845780085197
|
83 |
+
05/07/2023 05:13:01 INFO Quantizing attention.dense in layer 6/16...
|
84 |
+
05/07/2023 05:13:02 INFO duration: 0.7185342311859131
|
85 |
+
05/07/2023 05:13:02 INFO avg loss: 0.15947506450616514
|
86 |
+
05/07/2023 05:13:35 INFO Quantizing mlp.dense_h_to_4h in layer 6/16...
|
87 |
+
05/07/2023 05:13:36 INFO duration: 1.075429916381836
|
88 |
+
05/07/2023 05:13:36 INFO avg loss: 2.4491654498635516
|
89 |
+
05/07/2023 05:17:18 INFO Quantizing mlp.dense_4h_to_h in layer 6/16...
|
90 |
+
05/07/2023 05:17:24 INFO duration: 5.919256925582886
|
91 |
+
05/07/2023 05:17:24 INFO avg loss: 0.40534172017480363
|
92 |
+
05/07/2023 05:17:45 INFO Start quantizing layer 7/16
|
93 |
+
05/07/2023 05:18:18 INFO Quantizing attention.query_key_value in layer 7/16...
|
94 |
+
05/07/2023 05:18:19 INFO duration: 0.9676733016967773
|
95 |
+
05/07/2023 05:18:19 INFO avg loss: 2.131913417698349
|
96 |
+
05/07/2023 05:18:52 INFO Quantizing attention.dense in layer 7/16...
|
97 |
+
05/07/2023 05:18:53 INFO duration: 0.7196581363677979
|
98 |
+
05/07/2023 05:18:53 INFO avg loss: 0.20212076367915502
|
99 |
+
05/07/2023 05:19:26 INFO Quantizing mlp.dense_h_to_4h in layer 7/16...
|
100 |
+
05/07/2023 05:19:27 INFO duration: 1.0817346572875977
|
101 |
+
05/07/2023 05:19:27 INFO avg loss: 2.4321377462726304
|
102 |
+
05/07/2023 05:23:08 INFO Quantizing mlp.dense_4h_to_h in layer 7/16...
|
103 |
+
05/07/2023 05:23:14 INFO duration: 5.973307132720947
|
104 |
+
05/07/2023 05:23:14 INFO avg loss: 0.4796293378511049
|
105 |
+
05/07/2023 05:23:35 INFO Start quantizing layer 8/16
|
106 |
+
05/07/2023 05:24:08 INFO Quantizing attention.query_key_value in layer 8/16...
|
107 |
+
05/07/2023 05:24:09 INFO duration: 0.9668700695037842
|
108 |
+
05/07/2023 05:24:09 INFO avg loss: 2.3333008332501333
|
109 |
+
05/07/2023 05:24:42 INFO Quantizing attention.dense in layer 8/16...
|
110 |
+
05/07/2023 05:24:43 INFO duration: 0.7205338478088379
|
111 |
+
05/07/2023 05:24:43 INFO avg loss: 0.2906766491322218
|
112 |
+
05/07/2023 05:25:16 INFO Quantizing mlp.dense_h_to_4h in layer 8/16...
|
113 |
+
05/07/2023 05:25:17 INFO duration: 1.075392246246338
|
114 |
+
05/07/2023 05:25:17 INFO avg loss: 2.088160245690229
|
115 |
+
05/07/2023 05:28:59 INFO Quantizing mlp.dense_4h_to_h in layer 8/16...
|
116 |
+
05/07/2023 05:29:05 INFO duration: 6.0966198444366455
|
117 |
+
05/07/2023 05:29:05 INFO avg loss: 0.4126856014751398
|
118 |
+
05/07/2023 05:29:26 INFO Start quantizing layer 9/16
|
119 |
+
05/07/2023 05:29:59 INFO Quantizing attention.query_key_value in layer 9/16...
|
120 |
+
05/07/2023 05:30:00 INFO duration: 0.971062183380127
|
121 |
+
05/07/2023 05:30:00 INFO avg loss: 4.631909777689031
|
122 |
+
05/07/2023 05:30:33 INFO Quantizing attention.dense in layer 9/16...
|
123 |
+
05/07/2023 05:30:34 INFO duration: 0.7198226451873779
|
124 |
+
05/07/2023 05:30:34 INFO avg loss: 0.2723473172091321
|
125 |
+
05/07/2023 05:31:07 INFO Quantizing mlp.dense_h_to_4h in layer 9/16...
|
126 |
+
05/07/2023 05:31:08 INFO duration: 1.0791394710540771
|
127 |
+
05/07/2023 05:31:08 INFO avg loss: 2.0461749482078675
|
128 |
+
05/07/2023 05:34:49 INFO Quantizing mlp.dense_4h_to_h in layer 9/16...
|
129 |
+
05/07/2023 05:34:55 INFO duration: 5.983144044876099
|
130 |
+
05/07/2023 05:34:55 INFO avg loss: 0.5113805541342186
|
131 |
+
05/07/2023 05:35:16 INFO Start quantizing layer 10/16
|
132 |
+
05/07/2023 05:35:49 INFO Quantizing attention.query_key_value in layer 10/16...
|
133 |
+
05/07/2023 05:35:50 INFO duration: 0.9664998054504395
|
134 |
+
05/07/2023 05:35:50 INFO avg loss: 7.197037864416933
|
135 |
+
05/07/2023 05:36:23 INFO Quantizing attention.dense in layer 10/16...
|
136 |
+
05/07/2023 05:36:24 INFO duration: 0.7181813716888428
|
137 |
+
05/07/2023 05:36:24 INFO avg loss: 0.3427228673705405
|
138 |
+
05/07/2023 05:36:57 INFO Quantizing mlp.dense_h_to_4h in layer 10/16...
|
139 |
+
05/07/2023 05:36:58 INFO duration: 1.0781819820404053
|
140 |
+
05/07/2023 05:36:58 INFO avg loss: 2.320328880041933
|
141 |
+
05/07/2023 05:40:40 INFO Quantizing mlp.dense_4h_to_h in layer 10/16...
|
142 |
+
05/07/2023 05:40:46 INFO duration: 6.027331829071045
|
143 |
+
05/07/2023 05:40:46 INFO avg loss: 0.6135274056301584
|
144 |
+
05/07/2023 05:41:07 INFO Start quantizing layer 11/16
|
145 |
+
05/07/2023 05:41:40 INFO Quantizing attention.query_key_value in layer 11/16...
|
146 |
+
05/07/2023 05:41:41 INFO duration: 0.9669804573059082
|
147 |
+
05/07/2023 05:41:41 INFO avg loss: 7.502283845846645
|
148 |
+
05/07/2023 05:42:14 INFO Quantizing attention.dense in layer 11/16...
|
149 |
+
05/07/2023 05:42:14 INFO duration: 0.7167062759399414
|
150 |
+
05/07/2023 05:42:14 INFO avg loss: 0.2933824760591387
|
151 |
+
05/07/2023 05:42:47 INFO Quantizing mlp.dense_h_to_4h in layer 11/16...
|
152 |
+
05/07/2023 05:42:48 INFO duration: 1.077958345413208
|
153 |
+
05/07/2023 05:42:48 INFO avg loss: 2.6354988268769968
|
154 |
+
05/07/2023 05:46:30 INFO Quantizing mlp.dense_4h_to_h in layer 11/16...
|
155 |
+
05/07/2023 05:46:36 INFO duration: 5.968295335769653
|
156 |
+
05/07/2023 05:46:36 INFO avg loss: 0.7737983809238551
|
157 |
+
05/07/2023 05:46:57 INFO Start quantizing layer 12/16
|
158 |
+
05/07/2023 05:47:30 INFO Quantizing attention.query_key_value in layer 12/16...
|
159 |
+
05/07/2023 05:47:31 INFO duration: 0.9708924293518066
|
160 |
+
05/07/2023 05:47:31 INFO avg loss: 6.875169520433972
|
161 |
+
05/07/2023 05:48:04 INFO Quantizing attention.dense in layer 12/16...
|
162 |
+
05/07/2023 05:48:05 INFO duration: 0.7233545780181885
|
163 |
+
05/07/2023 05:48:05 INFO avg loss: 0.36776245897189497
|
164 |
+
05/07/2023 05:48:38 INFO Quantizing mlp.dense_h_to_4h in layer 12/16...
|
165 |
+
05/07/2023 05:48:39 INFO duration: 1.078718900680542
|
166 |
+
05/07/2023 05:48:39 INFO avg loss: 2.9615547415801386
|
167 |
+
05/07/2023 05:52:21 INFO Quantizing mlp.dense_4h_to_h in layer 12/16...
|
168 |
+
05/07/2023 05:52:27 INFO duration: 6.078177452087402
|
169 |
+
05/07/2023 05:52:27 INFO avg loss: 0.9158687896241015
|
170 |
+
05/07/2023 05:52:48 INFO Start quantizing layer 13/16
|
171 |
+
05/07/2023 05:53:21 INFO Quantizing attention.query_key_value in layer 13/16...
|
172 |
+
05/07/2023 05:53:22 INFO duration: 0.9698812961578369
|
173 |
+
05/07/2023 05:53:22 INFO avg loss: 5.93688639842918
|
174 |
+
05/07/2023 05:53:54 INFO Quantizing attention.dense in layer 13/16...
|
175 |
+
05/07/2023 05:53:55 INFO duration: 0.7205860614776611
|
176 |
+
05/07/2023 05:53:55 INFO avg loss: 0.24467934637912672
|
177 |
+
05/07/2023 05:54:28 INFO Quantizing mlp.dense_h_to_4h in layer 13/16...
|
178 |
+
05/07/2023 05:54:29 INFO duration: 1.0801022052764893
|
179 |
+
05/07/2023 05:54:29 INFO avg loss: 3.275802466054313
|
180 |
+
05/07/2023 05:58:11 INFO Quantizing mlp.dense_4h_to_h in layer 13/16...
|
181 |
+
05/07/2023 05:58:17 INFO duration: 6.09338641166687
|
182 |
+
05/07/2023 05:58:17 INFO avg loss: 1.0767965265991082
|
183 |
+
05/07/2023 05:58:38 INFO Start quantizing layer 14/16
|
184 |
+
05/07/2023 05:59:11 INFO Quantizing attention.query_key_value in layer 14/16...
|
185 |
+
05/07/2023 05:59:12 INFO duration: 0.9676227569580078
|
186 |
+
05/07/2023 05:59:12 INFO avg loss: 6.686944638578275
|
187 |
+
05/07/2023 05:59:45 INFO Quantizing attention.dense in layer 14/16...
|
188 |
+
05/07/2023 05:59:46 INFO duration: 0.7196416854858398
|
189 |
+
05/07/2023 05:59:46 INFO avg loss: 0.34242789661541534
|
190 |
+
05/07/2023 06:00:19 INFO Quantizing mlp.dense_h_to_4h in layer 14/16...
|
191 |
+
05/07/2023 06:00:20 INFO duration: 1.0829389095306396
|
192 |
+
05/07/2023 06:00:20 INFO avg loss: 3.705307965588392
|
193 |
+
05/07/2023 06:04:02 INFO Quantizing mlp.dense_4h_to_h in layer 14/16...
|
194 |
+
05/07/2023 06:04:08 INFO duration: 6.013010263442993
|
195 |
+
05/07/2023 06:04:08 INFO avg loss: 1.1975950458433173
|
196 |
+
05/07/2023 06:04:29 INFO Start quantizing layer 15/16
|
197 |
+
05/07/2023 06:05:02 INFO Quantizing attention.query_key_value in layer 15/16...
|
198 |
+
05/07/2023 06:05:03 INFO duration: 0.9704198837280273
|
199 |
+
05/07/2023 06:05:03 INFO avg loss: 7.567932973908413
|
200 |
+
05/07/2023 06:05:36 INFO Quantizing attention.dense in layer 15/16...
|
201 |
+
05/07/2023 06:05:37 INFO duration: 0.7222294807434082
|
202 |
+
05/07/2023 06:05:37 INFO avg loss: 0.4468821890184039
|
203 |
+
05/07/2023 06:06:10 INFO Quantizing mlp.dense_h_to_4h in layer 15/16...
|
204 |
+
05/07/2023 06:06:11 INFO duration: 1.0775363445281982
|
205 |
+
05/07/2023 06:06:11 INFO avg loss: 4.276716368393903
|
206 |
+
05/07/2023 06:09:52 INFO Quantizing mlp.dense_4h_to_h in layer 15/16...
|
207 |
+
05/07/2023 06:09:58 INFO duration: 6.097189664840698
|
208 |
+
05/07/2023 06:09:58 INFO avg loss: 1.6799194205937167
|
209 |
+
05/07/2023 06:10:19 INFO Start quantizing layer 16/16
|
210 |
+
05/07/2023 06:10:52 INFO Quantizing attention.query_key_value in layer 16/16...
|
211 |
+
05/07/2023 06:10:53 INFO duration: 0.9705617427825928
|
212 |
+
05/07/2023 06:10:53 INFO avg loss: 7.100380016972843
|
213 |
+
05/07/2023 06:11:26 INFO Quantizing attention.dense in layer 16/16...
|
214 |
+
05/07/2023 06:11:27 INFO duration: 0.722510814666748
|
215 |
+
05/07/2023 06:11:27 INFO avg loss: 0.24434113426330373
|
216 |
+
05/07/2023 06:12:00 INFO Quantizing mlp.dense_h_to_4h in layer 16/16...
|
217 |
+
05/07/2023 06:12:01 INFO duration: 1.0826246738433838
|
218 |
+
05/07/2023 06:12:01 INFO avg loss: 4.788446298422524
|
219 |
+
05/07/2023 06:15:43 INFO Quantizing mlp.dense_4h_to_h in layer 16/16...
|
220 |
+
05/07/2023 06:15:49 INFO duration: 6.170569658279419
|
221 |
+
05/07/2023 06:15:49 INFO avg loss: 1.7897084716536875
|
222 |
+
05/07/2023 06:16:11 INFO Packing model...
|
223 |
+
05/07/2023 06:16:11 INFO gpt_neox.layers.0.attention.dense
|
224 |
+
05/07/2023 06:16:12 INFO gpt_neox.layers.0.attention.query_key_value
|
225 |
+
05/07/2023 06:16:15 INFO gpt_neox.layers.0.mlp.dense_4h_to_h
|
226 |
+
05/07/2023 06:16:18 INFO gpt_neox.layers.0.mlp.dense_h_to_4h
|
227 |
+
05/07/2023 06:16:22 INFO gpt_neox.layers.1.attention.dense
|
228 |
+
05/07/2023 06:16:23 INFO gpt_neox.layers.1.attention.query_key_value
|
229 |
+
05/07/2023 06:16:26 INFO gpt_neox.layers.1.mlp.dense_4h_to_h
|
230 |
+
05/07/2023 06:16:29 INFO gpt_neox.layers.1.mlp.dense_h_to_4h
|
231 |
+
05/07/2023 06:16:33 INFO gpt_neox.layers.2.attention.dense
|
232 |
+
05/07/2023 06:16:34 INFO gpt_neox.layers.2.attention.query_key_value
|
233 |
+
05/07/2023 06:16:37 INFO gpt_neox.layers.2.mlp.dense_4h_to_h
|
234 |
+
05/07/2023 06:16:40 INFO gpt_neox.layers.2.mlp.dense_h_to_4h
|
235 |
+
05/07/2023 06:16:44 INFO gpt_neox.layers.3.attention.dense
|
236 |
+
05/07/2023 06:16:45 INFO gpt_neox.layers.3.attention.query_key_value
|
237 |
+
05/07/2023 06:16:48 INFO gpt_neox.layers.3.mlp.dense_4h_to_h
|
238 |
+
05/07/2023 06:16:51 INFO gpt_neox.layers.3.mlp.dense_h_to_4h
|
239 |
+
05/07/2023 06:16:56 INFO gpt_neox.layers.4.attention.dense
|
240 |
+
05/07/2023 06:16:56 INFO gpt_neox.layers.4.attention.query_key_value
|
241 |
+
05/07/2023 06:16:59 INFO gpt_neox.layers.4.mlp.dense_4h_to_h
|
242 |
+
05/07/2023 06:17:03 INFO gpt_neox.layers.4.mlp.dense_h_to_4h
|
243 |
+
05/07/2023 06:17:07 INFO gpt_neox.layers.5.attention.dense
|
244 |
+
05/07/2023 06:17:08 INFO gpt_neox.layers.5.attention.query_key_value
|
245 |
+
05/07/2023 06:17:11 INFO gpt_neox.layers.5.mlp.dense_4h_to_h
|
246 |
+
05/07/2023 06:17:14 INFO gpt_neox.layers.5.mlp.dense_h_to_4h
|
247 |
+
05/07/2023 06:17:18 INFO gpt_neox.layers.6.attention.dense
|
248 |
+
05/07/2023 06:17:19 INFO gpt_neox.layers.6.attention.query_key_value
|
249 |
+
05/07/2023 06:17:22 INFO gpt_neox.layers.6.mlp.dense_4h_to_h
|
250 |
+
05/07/2023 06:17:25 INFO gpt_neox.layers.6.mlp.dense_h_to_4h
|
251 |
+
05/07/2023 06:17:29 INFO gpt_neox.layers.7.attention.dense
|
252 |
+
05/07/2023 06:17:30 INFO gpt_neox.layers.7.attention.query_key_value
|
253 |
+
05/07/2023 06:17:33 INFO gpt_neox.layers.7.mlp.dense_4h_to_h
|
254 |
+
05/07/2023 06:17:36 INFO gpt_neox.layers.7.mlp.dense_h_to_4h
|
255 |
+
05/07/2023 06:17:40 INFO gpt_neox.layers.8.attention.dense
|
256 |
+
05/07/2023 06:17:41 INFO gpt_neox.layers.8.attention.query_key_value
|
257 |
+
05/07/2023 06:17:44 INFO gpt_neox.layers.8.mlp.dense_4h_to_h
|
258 |
+
05/07/2023 06:17:47 INFO gpt_neox.layers.8.mlp.dense_h_to_4h
|
259 |
+
05/07/2023 06:17:51 INFO gpt_neox.layers.9.attention.dense
|
260 |
+
05/07/2023 06:17:52 INFO gpt_neox.layers.9.attention.query_key_value
|
261 |
+
05/07/2023 06:17:55 INFO gpt_neox.layers.9.mlp.dense_4h_to_h
|
262 |
+
05/07/2023 06:17:58 INFO gpt_neox.layers.9.mlp.dense_h_to_4h
|
263 |
+
05/07/2023 06:18:02 INFO gpt_neox.layers.10.attention.dense
|
264 |
+
05/07/2023 06:18:03 INFO gpt_neox.layers.10.attention.query_key_value
|
265 |
+
05/07/2023 06:18:06 INFO gpt_neox.layers.10.mlp.dense_4h_to_h
|
266 |
+
05/07/2023 06:18:09 INFO gpt_neox.layers.10.mlp.dense_h_to_4h
|
267 |
+
05/07/2023 06:18:13 INFO gpt_neox.layers.11.attention.dense
|
268 |
+
05/07/2023 06:18:14 INFO gpt_neox.layers.11.attention.query_key_value
|
269 |
+
05/07/2023 06:18:17 INFO gpt_neox.layers.11.mlp.dense_4h_to_h
|
270 |
+
05/07/2023 06:18:20 INFO gpt_neox.layers.11.mlp.dense_h_to_4h
|
271 |
+
05/07/2023 06:18:24 INFO gpt_neox.layers.12.attention.dense
|
272 |
+
05/07/2023 06:18:25 INFO gpt_neox.layers.12.attention.query_key_value
|
273 |
+
05/07/2023 06:18:28 INFO gpt_neox.layers.12.mlp.dense_4h_to_h
|
274 |
+
05/07/2023 06:18:31 INFO gpt_neox.layers.12.mlp.dense_h_to_4h
|
275 |
+
05/07/2023 06:18:35 INFO gpt_neox.layers.13.attention.dense
|
276 |
+
05/07/2023 06:18:36 INFO gpt_neox.layers.13.attention.query_key_value
|
277 |
+
05/07/2023 06:18:39 INFO gpt_neox.layers.13.mlp.dense_4h_to_h
|
278 |
+
05/07/2023 06:18:42 INFO gpt_neox.layers.13.mlp.dense_h_to_4h
|
279 |
+
05/07/2023 06:18:46 INFO gpt_neox.layers.14.attention.dense
|
280 |
+
05/07/2023 06:18:47 INFO gpt_neox.layers.14.attention.query_key_value
|
281 |
+
05/07/2023 06:18:50 INFO gpt_neox.layers.14.mlp.dense_4h_to_h
|
282 |
+
05/07/2023 06:18:53 INFO gpt_neox.layers.14.mlp.dense_h_to_4h
|
283 |
+
05/07/2023 06:18:57 INFO gpt_neox.layers.15.attention.dense
|
284 |
+
05/07/2023 06:18:58 INFO gpt_neox.layers.15.attention.query_key_value
|
285 |
+
05/07/2023 06:19:01 INFO gpt_neox.layers.15.mlp.dense_4h_to_h
|
286 |
+
05/07/2023 06:19:04 INFO gpt_neox.layers.15.mlp.dense_h_to_4h
|
287 |
+
05/07/2023 06:19:08 INFO Model packed.
|
288 |
+
05/07/2023 06:19:08 WARNING using autotune_warmup will move model to GPU, make sure you have enough VRAM to load the whole model.
|
289 |
+
05/07/2023 06:19:09 INFO Found 4 unique KN Linear values.
|
290 |
+
05/07/2023 06:19:09 INFO Warming up autotune cache ...
|
291 |
+
05/07/2023 06:19:58 INFO Done! Saving..
|
292 |
+
05/07/2023 06:20:05 INFO Saved. Size of the model file(s): 10063.64 MB
|
293 |
+
05/07/2023 06:20:05 WARNING use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
|
294 |
+
05/07/2023 06:20:05 INFO embed_out not been quantized, will be ignored when make_quant.
|
295 |
+
05/07/2023 06:20:06 WARNING The safetensors archive passed at /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
|
296 |
+
05/07/2023 06:20:06 INFO Found 4 unique KN Linear values.
|
297 |
+
05/07/2023 06:20:06 INFO Warming up autotune cache ...
|
298 |
+
05/07/2023 06:20:07 INFO Sample output: ('Because woodchucks (or squirrels, as they\'re also known) are "the chink[e] '
|
299 |
+
'of wood."')
|
300 |
+
05/07/2023 06:20:07 INFO GPU memory usage during test inference: 4.61 GB
|
301 |
+
05/07/2023 06:20:08 WARNING No such comm: d349e6339e5442e4a3286af931f0699f
|
302 |
+
05/07/2023 06:20:08 WARNING No such comm: 9374387013794a8bab6ba19cace86d58
|
303 |
+
05/07/2023 06:20:08 WARNING No such comm: bf152b67bcc04b93863ac311ea4df76a
|
304 |
+
05/07/2023 06:20:08 WARNING No such comm: 118ccfc8fe874373ae03f8132fb8c258
|
305 |
+
05/07/2023 06:20:08 WARNING No such comm: 9d85d31e378c44ce9119ead8b83e7556
|
306 |
+
05/07/2023 06:20:08 WARNING No such comm: c8c5130cae894895a66be12fe834c673
|
307 |
+
05/07/2023 06:20:08 WARNING No such comm: 237ea212dbd74befad2f34ba2161307d
|
308 |
+
05/07/2023 06:20:08 WARNING No such comm: 86eda75ae855461b8f5c1ae5b3a83cec
|
309 |
+
05/07/2023 06:20:08 WARNING No such comm: 63731c4e51f0433fbf85712c08c3d4bf
|
310 |
+
05/07/2023 06:20:08 WARNING No such comm: 2079a099466341488fc017f30e9359a8
|
311 |
+
05/07/2023 06:20:08 WARNING No such comm: 99fa75439d3d47c0a6a5b7c25c526718
|
312 |
+
05/07/2023 06:20:09 WARNING use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
|
313 |
+
05/07/2023 06:20:09 INFO embed_out not been quantized, will be ignored when make_quant.
|
314 |
+
05/07/2023 06:20:09 WARNING The safetensors archive passed at /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
|
315 |
+
05/07/2023 06:20:10 INFO Found 4 unique KN Linear values.
|
316 |
+
05/07/2023 06:20:10 INFO Warming up autotune cache ...
|
317 |
+
05/07/2023 06:31:04 WARNING use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
|
318 |
+
05/07/2023 06:31:04 INFO embed_out not been quantized, will be ignored when make_quant.
|
319 |
+
05/07/2023 06:31:04 WARNING The safetensors archive passed at /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
|
320 |
+
05/07/2023 06:31:05 INFO Found 4 unique KN Linear values.
|
321 |
+
05/07/2023 06:31:05 INFO Warming up autotune cache ...
|
322 |
+
05/07/2023 06:31:46 WARNING use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
|
323 |
+
05/07/2023 06:31:46 INFO embed_out not been quantized, will be ignored when make_quant.
|
324 |
+
05/07/2023 06:31:46 WARNING The safetensors archive passed at /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
|
325 |
+
05/07/2023 06:31:46 INFO Found 4 unique KN Linear values.
|
326 |
+
05/07/2023 06:31:46 INFO Warming up autotune cache ...
|
327 |
+
05/07/2023 06:32:16 WARNING use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
|
328 |
+
05/07/2023 06:32:16 INFO embed_out not been quantized, will be ignored when make_quant.
|
329 |
+
05/07/2023 06:32:16 WARNING The safetensors archive passed at /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
|
330 |
+
05/07/2023 06:32:16 INFO Found 4 unique KN Linear values.
|
331 |
+
05/07/2023 06:32:16 INFO Warming up autotune cache ...
|
332 |
+
05/07/2023 06:32:42 WARNING use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
|
333 |
+
05/07/2023 06:32:42 INFO embed_out not been quantized, will be ignored when make_quant.
|
334 |
+
05/07/2023 06:32:42 WARNING The safetensors archive passed at /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
|
335 |
+
05/07/2023 06:32:42 INFO Found 4 unique KN Linear values.
|
336 |
+
05/07/2023 06:32:42 INFO Warming up autotune cache ...
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "openassistant/stablelm-7b-sft-v7-epoch-3",
|
3 |
+
"architectures": [
|
4 |
+
"GPTNeoXForCausalLM"
|
5 |
+
],
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"eos_token_id": 0,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_size": 6144,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 24576,
|
12 |
+
"layer_norm_eps": 1e-05,
|
13 |
+
"max_length": 4096,
|
14 |
+
"max_position_embeddings": 4096,
|
15 |
+
"model_type": "gpt_neox",
|
16 |
+
"num_attention_heads": 48,
|
17 |
+
"num_hidden_layers": 16,
|
18 |
+
"rotary_emb_base": 10000,
|
19 |
+
"rotary_pct": 0.25,
|
20 |
+
"tie_word_embeddings": false,
|
21 |
+
"torch_dtype": "bfloat16",
|
22 |
+
"transformers_version": "4.28.1",
|
23 |
+
"use_cache": true,
|
24 |
+
"use_parallel_residual": true,
|
25 |
+
"vocab_size": 50288
|
26 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_sample": true,
|
3 |
+
"eos_token_id": 0,
|
4 |
+
"from_model_config": false,
|
5 |
+
"max_new_tokens": 128,
|
6 |
+
"pad_token_id": 0,
|
7 |
+
"penalty_alpha": 0.6,
|
8 |
+
"temperature": 0.9,
|
9 |
+
"top_k": 4,
|
10 |
+
"transformers_version": "4.28.1"
|
11 |
+
}
|
gptq_model-4bit-128g.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5904ac19352a155dd3947ce06d4c282e7c673a071eeab48fba35533fc9b467d8
|
3 |
+
size 5275239687
|
gptq_model-4bit-128g.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9c9abe092b22d056211ae80889389f31986ae264df551c0dac3b27d28e50657
|
3 |
+
size 5275134944
|
quantize_config.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bits": 4,
|
3 |
+
"group_size": 128,
|
4 |
+
"damp_percent": 0.01,
|
5 |
+
"desc_act": true,
|
6 |
+
"sym": true,
|
7 |
+
"true_sequential": true
|
8 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<|prefix_end|>",
|
4 |
+
"<|prefix_begin|>",
|
5 |
+
"<|assistant|>",
|
6 |
+
"<|prompter|>",
|
7 |
+
"<|system|>"
|
8 |
+
],
|
9 |
+
"bos_token": "<|endoftext|>",
|
10 |
+
"eos_token": "<|endoftext|>",
|
11 |
+
"pad_token": "<|padding|>",
|
12 |
+
"sep_token": "<|endoftext|>",
|
13 |
+
"unk_token": "<|endoftext|>"
|
14 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": "<|endoftext|>",
|
4 |
+
"clean_up_tokenization_spaces": true,
|
5 |
+
"eos_token": "<|endoftext|>",
|
6 |
+
"model_max_length": 4096,
|
7 |
+
"tokenizer_class": "GPTNeoXTokenizer",
|
8 |
+
"unk_token": "<|endoftext|>"
|
9 |
+
}
|