pszemraj commited on
Commit
1e2d4fe
1 Parent(s): 0b5aae0

add quantized version

Browse files
.gitattributes CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ gptq_model-4bit-128g.bin filter=lfs diff=lfs merge=lfs -text
36
+ gptq_model-4bit-128g.safetensors filter=lfs diff=lfs merge=lfs -text
LOG_quant.log ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 05/07/2023 04:42:05 WARNING Found cached dataset parquet (/home/pszemraj/.cache/huggingface/datasets/OpenAssistant___parquet/OpenAssistant--oasst1-2960c57d7e52ab15/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
2
+ 05/07/2023 04:42:06 WARNING No such comm: c8c073cce7994da5b454ed0300090049
3
+ 05/07/2023 04:42:06 WARNING No such comm: 1103c6a0950249ca863ebc8399fddfef
4
+ 05/07/2023 04:42:06 WARNING No such comm: 5c3ce017525f4406904695297ace8724
5
+ 05/07/2023 04:42:06 WARNING No such comm: c5ceaf44ed3942cdb730705e230f024b
6
+ 05/07/2023 04:42:06 WARNING No such comm: f953c7265b2248c98cc4dbe971b44f3d
7
+ 05/07/2023 04:42:06 WARNING No such comm: 687a131767524803a41093a1d84f4652
8
+ 05/07/2023 04:42:06 WARNING No such comm: 93293aa5cce946bc8c6aa6ee4d0eaeb1
9
+ 05/07/2023 04:42:06 WARNING No such comm: 637d46ef1d57406a817ef020d0c7bf06
10
+ 05/07/2023 04:42:06 WARNING No such comm: 494913a72a3b4802b2390b58f38a3a36
11
+ 05/07/2023 04:42:06 WARNING No such comm: 2678191b17564118a9e16b1201d9b4d2
12
+ 05/07/2023 04:42:06 WARNING No such comm: 891bcbcf176840789f36c723e386c9b9
13
+ 05/07/2023 04:42:06 INFO Quantized model will be saved to: /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g
14
+ 05/07/2023 04:42:14 INFO Running quantization..
15
+ 05/07/2023 04:42:16 INFO Start quantizing layer 1/16
16
+ 05/07/2023 04:42:49 INFO Quantizing attention.query_key_value in layer 1/16...
17
+ 05/07/2023 04:42:50 INFO duration: 1.0365328788757324
18
+ 05/07/2023 04:42:50 INFO avg loss: 0.2228083991395018
19
+ 05/07/2023 04:43:23 INFO Quantizing attention.dense in layer 1/16...
20
+ 05/07/2023 04:43:24 INFO duration: 0.7084124088287354
21
+ 05/07/2023 04:43:24 INFO avg loss: 0.01904001936744958
22
+ 05/07/2023 04:43:57 INFO Quantizing mlp.dense_h_to_4h in layer 1/16...
23
+ 05/07/2023 04:43:58 INFO duration: 1.0652313232421875
24
+ 05/07/2023 04:43:58 INFO avg loss: 0.304011920770505/07/2023 04:47:44 INFO Quantizing mlp.dense_4h_to_h in layer 1/16...
25
+ 05/07/2023 04:47:51 INFO duration: 6.762867212295532
26
+ 05/07/2023 04:47:51 INFO avg loss: 0.028748639221516405
27
+ 05/07/2023 04:48:12 INFO Start quantizing layer 2/16
28
+ 05/07/2023 04:48:45 INFO Quantizing attention.query_key_value in layer 2/16...
29
+ 05/07/2023 04:48:46 INFO duration: 0.9713742733001709
30
+ 05/07/2023 04:48:46 INFO avg loss: 0.35355199259310105
31
+ 05/07/2023 04:49:19 INFO Quantizing attention.dense in layer 2/16...
32
+ 05/07/2023 04:49:20 INFO duration: 0.7275807857513428
33
+ 05/07/2023 04:49:20 INFO avg loss: 0.06647738861961487
34
+ 05/07/2023 04:49:53 INFO Quantizing mlp.dense_h_to_4h in layer 2/16...
35
+ 05/07/2023 04:49:54 INFO duration: 1.083951711654663
36
+ 05/07/2023 04:49:54 INFO avg loss: 0.6772610437882721
37
+ 05/07/2023 04:53:40 INFO Quantizing mlp.dense_4h_to_h in layer 2/16...
38
+ 05/07/2023 04:53:47 INFO duration: 6.844736814498901
39
+ 05/07/2023 04:53:47 INFO avg loss: 0.05320497620473908
40
+ 05/07/2023 04:54:08 INFO Start quantizing layer 3/16
41
+ 05/07/2023 04:54:41 INFO Quantizing attention.query_key_value in layer 3/16...
42
+ 05/07/2023 04:54:42 INFO duration: 0.9685044288635254
43
+ 05/07/2023 04:54:42 INFO avg loss: 0.6015139448756989
44
+ 05/07/2023 04:55:15 INFO Quantizing attention.dense in layer 3/16...
45
+ 05/07/2023 04:55:16 INFO duration: 0.7167198657989502
46
+ 05/07/2023 04:55:16 INFO avg loss: 0.06039099241344058
47
+ 05/07/2023 04:55:49 INFO Quantizing mlp.dense_h_to_4h in layer 3/16...
48
+ 05/07/2023 04:55:50 INFO duration: 1.0765190124511719
49
+ 05/07/2023 04:55:50 INFO avg loss: 1.3903707193490416
50
+ 05/07/2023 04:59:37 INFO Quantizing mlp.dense_4h_to_h in layer 3/16...
51
+ 05/07/2023 04:59:43 INFO duration: 6.270395040512085
52
+ 05/07/2023 04:59:43 INFO avg loss: 0.181059166011465
53
+ 05/07/2023 05:00:04 INFO Start quantizing layer 4/16
54
+ 05/07/2023 05:00:37 INFO Quantizing attention.query_key_value in layer 4/16...
55
+ 05/07/2023 05:00:38 INFO duration: 0.9672496318817139
56
+ 05/07/2023 05:00:38 INFO avg loss: 0.9807066506090255
57
+ 05/07/2023 05:01:11 INFO Quantizing attention.dense in layer 4/16...
58
+ 05/07/2023 05:01:12 INFO duration: 0.7248861789703369
59
+ 05/07/2023 05:01:12 INFO avg loss: 0.1315788618418863
60
+ 05/07/2023 05:01:45 INFO Quantizing mlp.dense_h_to_4h in layer 4/16...
61
+ 05/07/2023 05:01:46 INFO duration: 1.083066463470459
62
+ 05/07/2023 05:01:46 INFO avg loss: 2.080002984807641
63
+ 05/07/2023 05:05:32 INFO Quantizing mlp.dense_4h_to_h in layer 4/16...
64
+ 05/07/2023 05:05:38 INFO duration: 6.18793797492981
65
+ 05/07/2023 05:05:38 INFO avg loss: 0.252437506240016
66
+ 05/07/2023 05:05:59 INFO Start quantizing layer 5/16
67
+ 05/07/2023 05:06:32 INFO Quantizing attention.query_key_value in layer 5/16...
68
+ 05/07/2023 05:06:33 INFO duration: 0.9693779945373535
69
+ 05/07/2023 05:06:33 INFO avg loss: 1.3782398682940629
70
+ 05/07/2023 05:07:06 INFO Quantizing attention.dense in layer 5/16...
71
+ 05/07/2023 05:07:07 INFO duration: 0.7210879325866699
72
+ 05/07/2023 05:07:07 INFO avg loss: 0.14899523392779884
73
+ 05/07/2023 05:07:40 INFO Quantizing mlp.dense_h_to_4h in layer 5/16...
74
+ 05/07/2023 05:07:41 INFO duration: 1.0800914764404297
75
+ 05/07/2023 05:07:41 INFO avg loss: 2.332041130025293
76
+ 05/07/2023 05:11:27 INFO Quantizing mlp.dense_4h_to_h in layer 5/16...
77
+ 05/07/2023 05:11:33 INFO duration: 6.191901206970215
78
+ 05/07/2023 05:11:33 INFO avg loss: 0.3255492384060503
79
+ 05/07/2023 05:11:54 INFO Start quantizing layer 6/16
80
+ 05/07/2023 05:12:27 INFO Quantizing attention.query_key_value in layer 6/16...
81
+ 05/07/2023 05:12:28 INFO duration: 0.9662725925445557
82
+ 05/07/2023 05:12:28 INFO avg loss: 1.757845780085197
83
+ 05/07/2023 05:13:01 INFO Quantizing attention.dense in layer 6/16...
84
+ 05/07/2023 05:13:02 INFO duration: 0.7185342311859131
85
+ 05/07/2023 05:13:02 INFO avg loss: 0.15947506450616514
86
+ 05/07/2023 05:13:35 INFO Quantizing mlp.dense_h_to_4h in layer 6/16...
87
+ 05/07/2023 05:13:36 INFO duration: 1.075429916381836
88
+ 05/07/2023 05:13:36 INFO avg loss: 2.4491654498635516
89
+ 05/07/2023 05:17:18 INFO Quantizing mlp.dense_4h_to_h in layer 6/16...
90
+ 05/07/2023 05:17:24 INFO duration: 5.919256925582886
91
+ 05/07/2023 05:17:24 INFO avg loss: 0.40534172017480363
92
+ 05/07/2023 05:17:45 INFO Start quantizing layer 7/16
93
+ 05/07/2023 05:18:18 INFO Quantizing attention.query_key_value in layer 7/16...
94
+ 05/07/2023 05:18:19 INFO duration: 0.9676733016967773
95
+ 05/07/2023 05:18:19 INFO avg loss: 2.131913417698349
96
+ 05/07/2023 05:18:52 INFO Quantizing attention.dense in layer 7/16...
97
+ 05/07/2023 05:18:53 INFO duration: 0.7196581363677979
98
+ 05/07/2023 05:18:53 INFO avg loss: 0.20212076367915502
99
+ 05/07/2023 05:19:26 INFO Quantizing mlp.dense_h_to_4h in layer 7/16...
100
+ 05/07/2023 05:19:27 INFO duration: 1.0817346572875977
101
+ 05/07/2023 05:19:27 INFO avg loss: 2.4321377462726304
102
+ 05/07/2023 05:23:08 INFO Quantizing mlp.dense_4h_to_h in layer 7/16...
103
+ 05/07/2023 05:23:14 INFO duration: 5.973307132720947
104
+ 05/07/2023 05:23:14 INFO avg loss: 0.4796293378511049
105
+ 05/07/2023 05:23:35 INFO Start quantizing layer 8/16
106
+ 05/07/2023 05:24:08 INFO Quantizing attention.query_key_value in layer 8/16...
107
+ 05/07/2023 05:24:09 INFO duration: 0.9668700695037842
108
+ 05/07/2023 05:24:09 INFO avg loss: 2.3333008332501333
109
+ 05/07/2023 05:24:42 INFO Quantizing attention.dense in layer 8/16...
110
+ 05/07/2023 05:24:43 INFO duration: 0.7205338478088379
111
+ 05/07/2023 05:24:43 INFO avg loss: 0.2906766491322218
112
+ 05/07/2023 05:25:16 INFO Quantizing mlp.dense_h_to_4h in layer 8/16...
113
+ 05/07/2023 05:25:17 INFO duration: 1.075392246246338
114
+ 05/07/2023 05:25:17 INFO avg loss: 2.088160245690229
115
+ 05/07/2023 05:28:59 INFO Quantizing mlp.dense_4h_to_h in layer 8/16...
116
+ 05/07/2023 05:29:05 INFO duration: 6.0966198444366455
117
+ 05/07/2023 05:29:05 INFO avg loss: 0.4126856014751398
118
+ 05/07/2023 05:29:26 INFO Start quantizing layer 9/16
119
+ 05/07/2023 05:29:59 INFO Quantizing attention.query_key_value in layer 9/16...
120
+ 05/07/2023 05:30:00 INFO duration: 0.971062183380127
121
+ 05/07/2023 05:30:00 INFO avg loss: 4.631909777689031
122
+ 05/07/2023 05:30:33 INFO Quantizing attention.dense in layer 9/16...
123
+ 05/07/2023 05:30:34 INFO duration: 0.7198226451873779
124
+ 05/07/2023 05:30:34 INFO avg loss: 0.2723473172091321
125
+ 05/07/2023 05:31:07 INFO Quantizing mlp.dense_h_to_4h in layer 9/16...
126
+ 05/07/2023 05:31:08 INFO duration: 1.0791394710540771
127
+ 05/07/2023 05:31:08 INFO avg loss: 2.0461749482078675
128
+ 05/07/2023 05:34:49 INFO Quantizing mlp.dense_4h_to_h in layer 9/16...
129
+ 05/07/2023 05:34:55 INFO duration: 5.983144044876099
130
+ 05/07/2023 05:34:55 INFO avg loss: 0.5113805541342186
131
+ 05/07/2023 05:35:16 INFO Start quantizing layer 10/16
132
+ 05/07/2023 05:35:49 INFO Quantizing attention.query_key_value in layer 10/16...
133
+ 05/07/2023 05:35:50 INFO duration: 0.9664998054504395
134
+ 05/07/2023 05:35:50 INFO avg loss: 7.197037864416933
135
+ 05/07/2023 05:36:23 INFO Quantizing attention.dense in layer 10/16...
136
+ 05/07/2023 05:36:24 INFO duration: 0.7181813716888428
137
+ 05/07/2023 05:36:24 INFO avg loss: 0.3427228673705405
138
+ 05/07/2023 05:36:57 INFO Quantizing mlp.dense_h_to_4h in layer 10/16...
139
+ 05/07/2023 05:36:58 INFO duration: 1.0781819820404053
140
+ 05/07/2023 05:36:58 INFO avg loss: 2.320328880041933
141
+ 05/07/2023 05:40:40 INFO Quantizing mlp.dense_4h_to_h in layer 10/16...
142
+ 05/07/2023 05:40:46 INFO duration: 6.027331829071045
143
+ 05/07/2023 05:40:46 INFO avg loss: 0.6135274056301584
144
+ 05/07/2023 05:41:07 INFO Start quantizing layer 11/16
145
+ 05/07/2023 05:41:40 INFO Quantizing attention.query_key_value in layer 11/16...
146
+ 05/07/2023 05:41:41 INFO duration: 0.9669804573059082
147
+ 05/07/2023 05:41:41 INFO avg loss: 7.502283845846645
148
+ 05/07/2023 05:42:14 INFO Quantizing attention.dense in layer 11/16...
149
+ 05/07/2023 05:42:14 INFO duration: 0.7167062759399414
150
+ 05/07/2023 05:42:14 INFO avg loss: 0.2933824760591387
151
+ 05/07/2023 05:42:47 INFO Quantizing mlp.dense_h_to_4h in layer 11/16...
152
+ 05/07/2023 05:42:48 INFO duration: 1.077958345413208
153
+ 05/07/2023 05:42:48 INFO avg loss: 2.6354988268769968
154
+ 05/07/2023 05:46:30 INFO Quantizing mlp.dense_4h_to_h in layer 11/16...
155
+ 05/07/2023 05:46:36 INFO duration: 5.968295335769653
156
+ 05/07/2023 05:46:36 INFO avg loss: 0.7737983809238551
157
+ 05/07/2023 05:46:57 INFO Start quantizing layer 12/16
158
+ 05/07/2023 05:47:30 INFO Quantizing attention.query_key_value in layer 12/16...
159
+ 05/07/2023 05:47:31 INFO duration: 0.9708924293518066
160
+ 05/07/2023 05:47:31 INFO avg loss: 6.875169520433972
161
+ 05/07/2023 05:48:04 INFO Quantizing attention.dense in layer 12/16...
162
+ 05/07/2023 05:48:05 INFO duration: 0.7233545780181885
163
+ 05/07/2023 05:48:05 INFO avg loss: 0.36776245897189497
164
+ 05/07/2023 05:48:38 INFO Quantizing mlp.dense_h_to_4h in layer 12/16...
165
+ 05/07/2023 05:48:39 INFO duration: 1.078718900680542
166
+ 05/07/2023 05:48:39 INFO avg loss: 2.9615547415801386
167
+ 05/07/2023 05:52:21 INFO Quantizing mlp.dense_4h_to_h in layer 12/16...
168
+ 05/07/2023 05:52:27 INFO duration: 6.078177452087402
169
+ 05/07/2023 05:52:27 INFO avg loss: 0.9158687896241015
170
+ 05/07/2023 05:52:48 INFO Start quantizing layer 13/16
171
+ 05/07/2023 05:53:21 INFO Quantizing attention.query_key_value in layer 13/16...
172
+ 05/07/2023 05:53:22 INFO duration: 0.9698812961578369
173
+ 05/07/2023 05:53:22 INFO avg loss: 5.93688639842918
174
+ 05/07/2023 05:53:54 INFO Quantizing attention.dense in layer 13/16...
175
+ 05/07/2023 05:53:55 INFO duration: 0.7205860614776611
176
+ 05/07/2023 05:53:55 INFO avg loss: 0.24467934637912672
177
+ 05/07/2023 05:54:28 INFO Quantizing mlp.dense_h_to_4h in layer 13/16...
178
+ 05/07/2023 05:54:29 INFO duration: 1.0801022052764893
179
+ 05/07/2023 05:54:29 INFO avg loss: 3.275802466054313
180
+ 05/07/2023 05:58:11 INFO Quantizing mlp.dense_4h_to_h in layer 13/16...
181
+ 05/07/2023 05:58:17 INFO duration: 6.09338641166687
182
+ 05/07/2023 05:58:17 INFO avg loss: 1.0767965265991082
183
+ 05/07/2023 05:58:38 INFO Start quantizing layer 14/16
184
+ 05/07/2023 05:59:11 INFO Quantizing attention.query_key_value in layer 14/16...
185
+ 05/07/2023 05:59:12 INFO duration: 0.9676227569580078
186
+ 05/07/2023 05:59:12 INFO avg loss: 6.686944638578275
187
+ 05/07/2023 05:59:45 INFO Quantizing attention.dense in layer 14/16...
188
+ 05/07/2023 05:59:46 INFO duration: 0.7196416854858398
189
+ 05/07/2023 05:59:46 INFO avg loss: 0.34242789661541534
190
+ 05/07/2023 06:00:19 INFO Quantizing mlp.dense_h_to_4h in layer 14/16...
191
+ 05/07/2023 06:00:20 INFO duration: 1.0829389095306396
192
+ 05/07/2023 06:00:20 INFO avg loss: 3.705307965588392
193
+ 05/07/2023 06:04:02 INFO Quantizing mlp.dense_4h_to_h in layer 14/16...
194
+ 05/07/2023 06:04:08 INFO duration: 6.013010263442993
195
+ 05/07/2023 06:04:08 INFO avg loss: 1.1975950458433173
196
+ 05/07/2023 06:04:29 INFO Start quantizing layer 15/16
197
+ 05/07/2023 06:05:02 INFO Quantizing attention.query_key_value in layer 15/16...
198
+ 05/07/2023 06:05:03 INFO duration: 0.9704198837280273
199
+ 05/07/2023 06:05:03 INFO avg loss: 7.567932973908413
200
+ 05/07/2023 06:05:36 INFO Quantizing attention.dense in layer 15/16...
201
+ 05/07/2023 06:05:37 INFO duration: 0.7222294807434082
202
+ 05/07/2023 06:05:37 INFO avg loss: 0.4468821890184039
203
+ 05/07/2023 06:06:10 INFO Quantizing mlp.dense_h_to_4h in layer 15/16...
204
+ 05/07/2023 06:06:11 INFO duration: 1.0775363445281982
205
+ 05/07/2023 06:06:11 INFO avg loss: 4.276716368393903
206
+ 05/07/2023 06:09:52 INFO Quantizing mlp.dense_4h_to_h in layer 15/16...
207
+ 05/07/2023 06:09:58 INFO duration: 6.097189664840698
208
+ 05/07/2023 06:09:58 INFO avg loss: 1.6799194205937167
209
+ 05/07/2023 06:10:19 INFO Start quantizing layer 16/16
210
+ 05/07/2023 06:10:52 INFO Quantizing attention.query_key_value in layer 16/16...
211
+ 05/07/2023 06:10:53 INFO duration: 0.9705617427825928
212
+ 05/07/2023 06:10:53 INFO avg loss: 7.100380016972843
213
+ 05/07/2023 06:11:26 INFO Quantizing attention.dense in layer 16/16...
214
+ 05/07/2023 06:11:27 INFO duration: 0.722510814666748
215
+ 05/07/2023 06:11:27 INFO avg loss: 0.24434113426330373
216
+ 05/07/2023 06:12:00 INFO Quantizing mlp.dense_h_to_4h in layer 16/16...
217
+ 05/07/2023 06:12:01 INFO duration: 1.0826246738433838
218
+ 05/07/2023 06:12:01 INFO avg loss: 4.788446298422524
219
+ 05/07/2023 06:15:43 INFO Quantizing mlp.dense_4h_to_h in layer 16/16...
220
+ 05/07/2023 06:15:49 INFO duration: 6.170569658279419
221
+ 05/07/2023 06:15:49 INFO avg loss: 1.7897084716536875
222
+ 05/07/2023 06:16:11 INFO Packing model...
223
+ 05/07/2023 06:16:11 INFO gpt_neox.layers.0.attention.dense
224
+ 05/07/2023 06:16:12 INFO gpt_neox.layers.0.attention.query_key_value
225
+ 05/07/2023 06:16:15 INFO gpt_neox.layers.0.mlp.dense_4h_to_h
226
+ 05/07/2023 06:16:18 INFO gpt_neox.layers.0.mlp.dense_h_to_4h
227
+ 05/07/2023 06:16:22 INFO gpt_neox.layers.1.attention.dense
228
+ 05/07/2023 06:16:23 INFO gpt_neox.layers.1.attention.query_key_value
229
+ 05/07/2023 06:16:26 INFO gpt_neox.layers.1.mlp.dense_4h_to_h
230
+ 05/07/2023 06:16:29 INFO gpt_neox.layers.1.mlp.dense_h_to_4h
231
+ 05/07/2023 06:16:33 INFO gpt_neox.layers.2.attention.dense
232
+ 05/07/2023 06:16:34 INFO gpt_neox.layers.2.attention.query_key_value
233
+ 05/07/2023 06:16:37 INFO gpt_neox.layers.2.mlp.dense_4h_to_h
234
+ 05/07/2023 06:16:40 INFO gpt_neox.layers.2.mlp.dense_h_to_4h
235
+ 05/07/2023 06:16:44 INFO gpt_neox.layers.3.attention.dense
236
+ 05/07/2023 06:16:45 INFO gpt_neox.layers.3.attention.query_key_value
237
+ 05/07/2023 06:16:48 INFO gpt_neox.layers.3.mlp.dense_4h_to_h
238
+ 05/07/2023 06:16:51 INFO gpt_neox.layers.3.mlp.dense_h_to_4h
239
+ 05/07/2023 06:16:56 INFO gpt_neox.layers.4.attention.dense
240
+ 05/07/2023 06:16:56 INFO gpt_neox.layers.4.attention.query_key_value
241
+ 05/07/2023 06:16:59 INFO gpt_neox.layers.4.mlp.dense_4h_to_h
242
+ 05/07/2023 06:17:03 INFO gpt_neox.layers.4.mlp.dense_h_to_4h
243
+ 05/07/2023 06:17:07 INFO gpt_neox.layers.5.attention.dense
244
+ 05/07/2023 06:17:08 INFO gpt_neox.layers.5.attention.query_key_value
245
+ 05/07/2023 06:17:11 INFO gpt_neox.layers.5.mlp.dense_4h_to_h
246
+ 05/07/2023 06:17:14 INFO gpt_neox.layers.5.mlp.dense_h_to_4h
247
+ 05/07/2023 06:17:18 INFO gpt_neox.layers.6.attention.dense
248
+ 05/07/2023 06:17:19 INFO gpt_neox.layers.6.attention.query_key_value
249
+ 05/07/2023 06:17:22 INFO gpt_neox.layers.6.mlp.dense_4h_to_h
250
+ 05/07/2023 06:17:25 INFO gpt_neox.layers.6.mlp.dense_h_to_4h
251
+ 05/07/2023 06:17:29 INFO gpt_neox.layers.7.attention.dense
252
+ 05/07/2023 06:17:30 INFO gpt_neox.layers.7.attention.query_key_value
253
+ 05/07/2023 06:17:33 INFO gpt_neox.layers.7.mlp.dense_4h_to_h
254
+ 05/07/2023 06:17:36 INFO gpt_neox.layers.7.mlp.dense_h_to_4h
255
+ 05/07/2023 06:17:40 INFO gpt_neox.layers.8.attention.dense
256
+ 05/07/2023 06:17:41 INFO gpt_neox.layers.8.attention.query_key_value
257
+ 05/07/2023 06:17:44 INFO gpt_neox.layers.8.mlp.dense_4h_to_h
258
+ 05/07/2023 06:17:47 INFO gpt_neox.layers.8.mlp.dense_h_to_4h
259
+ 05/07/2023 06:17:51 INFO gpt_neox.layers.9.attention.dense
260
+ 05/07/2023 06:17:52 INFO gpt_neox.layers.9.attention.query_key_value
261
+ 05/07/2023 06:17:55 INFO gpt_neox.layers.9.mlp.dense_4h_to_h
262
+ 05/07/2023 06:17:58 INFO gpt_neox.layers.9.mlp.dense_h_to_4h
263
+ 05/07/2023 06:18:02 INFO gpt_neox.layers.10.attention.dense
264
+ 05/07/2023 06:18:03 INFO gpt_neox.layers.10.attention.query_key_value
265
+ 05/07/2023 06:18:06 INFO gpt_neox.layers.10.mlp.dense_4h_to_h
266
+ 05/07/2023 06:18:09 INFO gpt_neox.layers.10.mlp.dense_h_to_4h
267
+ 05/07/2023 06:18:13 INFO gpt_neox.layers.11.attention.dense
268
+ 05/07/2023 06:18:14 INFO gpt_neox.layers.11.attention.query_key_value
269
+ 05/07/2023 06:18:17 INFO gpt_neox.layers.11.mlp.dense_4h_to_h
270
+ 05/07/2023 06:18:20 INFO gpt_neox.layers.11.mlp.dense_h_to_4h
271
+ 05/07/2023 06:18:24 INFO gpt_neox.layers.12.attention.dense
272
+ 05/07/2023 06:18:25 INFO gpt_neox.layers.12.attention.query_key_value
273
+ 05/07/2023 06:18:28 INFO gpt_neox.layers.12.mlp.dense_4h_to_h
274
+ 05/07/2023 06:18:31 INFO gpt_neox.layers.12.mlp.dense_h_to_4h
275
+ 05/07/2023 06:18:35 INFO gpt_neox.layers.13.attention.dense
276
+ 05/07/2023 06:18:36 INFO gpt_neox.layers.13.attention.query_key_value
277
+ 05/07/2023 06:18:39 INFO gpt_neox.layers.13.mlp.dense_4h_to_h
278
+ 05/07/2023 06:18:42 INFO gpt_neox.layers.13.mlp.dense_h_to_4h
279
+ 05/07/2023 06:18:46 INFO gpt_neox.layers.14.attention.dense
280
+ 05/07/2023 06:18:47 INFO gpt_neox.layers.14.attention.query_key_value
281
+ 05/07/2023 06:18:50 INFO gpt_neox.layers.14.mlp.dense_4h_to_h
282
+ 05/07/2023 06:18:53 INFO gpt_neox.layers.14.mlp.dense_h_to_4h
283
+ 05/07/2023 06:18:57 INFO gpt_neox.layers.15.attention.dense
284
+ 05/07/2023 06:18:58 INFO gpt_neox.layers.15.attention.query_key_value
285
+ 05/07/2023 06:19:01 INFO gpt_neox.layers.15.mlp.dense_4h_to_h
286
+ 05/07/2023 06:19:04 INFO gpt_neox.layers.15.mlp.dense_h_to_4h
287
+ 05/07/2023 06:19:08 INFO Model packed.
288
+ 05/07/2023 06:19:08 WARNING using autotune_warmup will move model to GPU, make sure you have enough VRAM to load the whole model.
289
+ 05/07/2023 06:19:09 INFO Found 4 unique KN Linear values.
290
+ 05/07/2023 06:19:09 INFO Warming up autotune cache ...
291
+ 05/07/2023 06:19:58 INFO Done! Saving..
292
+ 05/07/2023 06:20:05 INFO Saved. Size of the model file(s): 10063.64 MB
293
+ 05/07/2023 06:20:05 WARNING use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
294
+ 05/07/2023 06:20:05 INFO embed_out not been quantized, will be ignored when make_quant.
295
+ 05/07/2023 06:20:06 WARNING The safetensors archive passed at /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
296
+ 05/07/2023 06:20:06 INFO Found 4 unique KN Linear values.
297
+ 05/07/2023 06:20:06 INFO Warming up autotune cache ...
298
+ 05/07/2023 06:20:07 INFO Sample output: ('Because woodchucks (or squirrels, as they\'re also known) are "the chink[e] '
299
+ 'of wood."')
300
+ 05/07/2023 06:20:07 INFO GPU memory usage during test inference: 4.61 GB
301
+ 05/07/2023 06:20:08 WARNING No such comm: d349e6339e5442e4a3286af931f0699f
302
+ 05/07/2023 06:20:08 WARNING No such comm: 9374387013794a8bab6ba19cace86d58
303
+ 05/07/2023 06:20:08 WARNING No such comm: bf152b67bcc04b93863ac311ea4df76a
304
+ 05/07/2023 06:20:08 WARNING No such comm: 118ccfc8fe874373ae03f8132fb8c258
305
+ 05/07/2023 06:20:08 WARNING No such comm: 9d85d31e378c44ce9119ead8b83e7556
306
+ 05/07/2023 06:20:08 WARNING No such comm: c8c5130cae894895a66be12fe834c673
307
+ 05/07/2023 06:20:08 WARNING No such comm: 237ea212dbd74befad2f34ba2161307d
308
+ 05/07/2023 06:20:08 WARNING No such comm: 86eda75ae855461b8f5c1ae5b3a83cec
309
+ 05/07/2023 06:20:08 WARNING No such comm: 63731c4e51f0433fbf85712c08c3d4bf
310
+ 05/07/2023 06:20:08 WARNING No such comm: 2079a099466341488fc017f30e9359a8
311
+ 05/07/2023 06:20:08 WARNING No such comm: 99fa75439d3d47c0a6a5b7c25c526718
312
+ 05/07/2023 06:20:09 WARNING use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
313
+ 05/07/2023 06:20:09 INFO embed_out not been quantized, will be ignored when make_quant.
314
+ 05/07/2023 06:20:09 WARNING The safetensors archive passed at /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
315
+ 05/07/2023 06:20:10 INFO Found 4 unique KN Linear values.
316
+ 05/07/2023 06:20:10 INFO Warming up autotune cache ...
317
+ 05/07/2023 06:31:04 WARNING use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
318
+ 05/07/2023 06:31:04 INFO embed_out not been quantized, will be ignored when make_quant.
319
+ 05/07/2023 06:31:04 WARNING The safetensors archive passed at /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
320
+ 05/07/2023 06:31:05 INFO Found 4 unique KN Linear values.
321
+ 05/07/2023 06:31:05 INFO Warming up autotune cache ...
322
+ 05/07/2023 06:31:46 WARNING use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
323
+ 05/07/2023 06:31:46 INFO embed_out not been quantized, will be ignored when make_quant.
324
+ 05/07/2023 06:31:46 WARNING The safetensors archive passed at /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
325
+ 05/07/2023 06:31:46 INFO Found 4 unique KN Linear values.
326
+ 05/07/2023 06:31:46 INFO Warming up autotune cache ...
327
+ 05/07/2023 06:32:16 WARNING use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
328
+ 05/07/2023 06:32:16 INFO embed_out not been quantized, will be ignored when make_quant.
329
+ 05/07/2023 06:32:16 WARNING The safetensors archive passed at /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
330
+ 05/07/2023 06:32:16 INFO Found 4 unique KN Linear values.
331
+ 05/07/2023 06:32:16 INFO Warming up autotune cache ...
332
+ 05/07/2023 06:32:42 WARNING use_triton will force moving the whole model to GPU, make sure you have enough VRAM.
333
+ 05/07/2023 06:32:42 INFO embed_out not been quantized, will be ignored when make_quant.
334
+ 05/07/2023 06:32:42 WARNING The safetensors archive passed at /home/pszemraj/workspace/misc-train/quantization/quantized-models/stablelm-7b-sft-v7-epoch-3-4bit-128g/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
335
+ 05/07/2023 06:32:42 INFO Found 4 unique KN Linear values.
336
+ 05/07/2023 06:32:42 INFO Warming up autotune cache ...
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openassistant/stablelm-7b-sft-v7-epoch-3",
3
+ "architectures": [
4
+ "GPTNeoXForCausalLM"
5
+ ],
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 0,
8
+ "hidden_act": "gelu",
9
+ "hidden_size": 6144,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 24576,
12
+ "layer_norm_eps": 1e-05,
13
+ "max_length": 4096,
14
+ "max_position_embeddings": 4096,
15
+ "model_type": "gpt_neox",
16
+ "num_attention_heads": 48,
17
+ "num_hidden_layers": 16,
18
+ "rotary_emb_base": 10000,
19
+ "rotary_pct": 0.25,
20
+ "tie_word_embeddings": false,
21
+ "torch_dtype": "bfloat16",
22
+ "transformers_version": "4.28.1",
23
+ "use_cache": true,
24
+ "use_parallel_residual": true,
25
+ "vocab_size": 50288
26
+ }
generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": 0,
4
+ "from_model_config": false,
5
+ "max_new_tokens": 128,
6
+ "pad_token_id": 0,
7
+ "penalty_alpha": 0.6,
8
+ "temperature": 0.9,
9
+ "top_k": 4,
10
+ "transformers_version": "4.28.1"
11
+ }
gptq_model-4bit-128g.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5904ac19352a155dd3947ce06d4c282e7c673a071eeab48fba35533fc9b467d8
3
+ size 5275239687
gptq_model-4bit-128g.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9c9abe092b22d056211ae80889389f31986ae264df551c0dac3b27d28e50657
3
+ size 5275134944
quantize_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "damp_percent": 0.01,
5
+ "desc_act": true,
6
+ "sym": true,
7
+ "true_sequential": true
8
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|prefix_end|>",
4
+ "<|prefix_begin|>",
5
+ "<|assistant|>",
6
+ "<|prompter|>",
7
+ "<|system|>"
8
+ ],
9
+ "bos_token": "<|endoftext|>",
10
+ "eos_token": "<|endoftext|>",
11
+ "pad_token": "<|padding|>",
12
+ "sep_token": "<|endoftext|>",
13
+ "unk_token": "<|endoftext|>"
14
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|endoftext|>",
6
+ "model_max_length": 4096,
7
+ "tokenizer_class": "GPTNeoXTokenizer",
8
+ "unk_token": "<|endoftext|>"
9
+ }