100rab25 commited on
Commit
49d4fd8
1 Parent(s): 9cfcf2a

End of training

Browse files
Files changed (5) hide show
  1. README.md +7 -7
  2. all_results.json +16 -0
  3. eval_results.json +11 -0
  4. train_results.json +8 -0
  5. trainer_state.json +926 -0
README.md CHANGED
@@ -26,16 +26,16 @@ model-index:
26
  metrics:
27
  - name: Accuracy
28
  type: accuracy
29
- value: 0.9969230769230769
30
  - name: Precision
31
  type: precision
32
- value: 0.9888888888888889
33
  - name: Recall
34
  type: recall
35
  value: 1.0
36
  - name: F1
37
  type: f1
38
- value: 0.9944134078212291
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -45,11 +45,11 @@ should probably proofread and complete it, then remove this comment. -->
45
 
46
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
47
  It achieves the following results on the evaluation set:
48
- - Loss: 0.0075
49
- - Accuracy: 0.9969
50
- - Precision: 0.9889
51
  - Recall: 1.0
52
- - F1: 0.9944
53
 
54
  ## Model description
55
 
 
26
  metrics:
27
  - name: Accuracy
28
  type: accuracy
29
+ value: 1.0
30
  - name: Precision
31
  type: precision
32
+ value: 1.0
33
  - name: Recall
34
  type: recall
35
  value: 1.0
36
  - name: F1
37
  type: f1
38
+ value: 1.0
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
45
 
46
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
47
  It achieves the following results on the evaluation set:
48
+ - Loss: 0.0072
49
+ - Accuracy: 1.0
50
+ - Precision: 1.0
51
  - Recall: 1.0
52
+ - F1: 1.0
53
 
54
  ## Model description
55
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 1.0,
4
+ "eval_f1": 1.0,
5
+ "eval_loss": 0.00724475271999836,
6
+ "eval_precision": 1.0,
7
+ "eval_recall": 1.0,
8
+ "eval_runtime": 0.6251,
9
+ "eval_samples_per_second": 519.879,
10
+ "eval_steps_per_second": 17.596,
11
+ "total_flos": 1.4530811161131418e+18,
12
+ "train_loss": 0.0580909106111073,
13
+ "train_runtime": 291.8902,
14
+ "train_samples_per_second": 200.281,
15
+ "train_steps_per_second": 1.576
16
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 1.0,
4
+ "eval_f1": 1.0,
5
+ "eval_loss": 0.00724475271999836,
6
+ "eval_precision": 1.0,
7
+ "eval_recall": 1.0,
8
+ "eval_runtime": 0.6251,
9
+ "eval_samples_per_second": 519.879,
10
+ "eval_steps_per_second": 17.596
11
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "total_flos": 1.4530811161131418e+18,
4
+ "train_loss": 0.0580909106111073,
5
+ "train_runtime": 291.8902,
6
+ "train_samples_per_second": 200.281,
7
+ "train_steps_per_second": 1.576
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,926 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "bridalMakeupClassifier_binary/checkpoint-414",
4
+ "epoch": 20.0,
5
+ "eval_steps": 500,
6
+ "global_step": 460,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.21739130434782608,
13
+ "grad_norm": 7.482993125915527,
14
+ "learning_rate": 5.4347826086956525e-06,
15
+ "loss": 0.7413,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.43478260869565216,
20
+ "grad_norm": 5.045105934143066,
21
+ "learning_rate": 1.0869565217391305e-05,
22
+ "loss": 0.6285,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.6521739130434783,
27
+ "grad_norm": 3.057001829147339,
28
+ "learning_rate": 1.630434782608696e-05,
29
+ "loss": 0.4579,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.8695652173913043,
34
+ "grad_norm": 2.394951581954956,
35
+ "learning_rate": 2.173913043478261e-05,
36
+ "loss": 0.2966,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_accuracy": 0.9661538461538461,
42
+ "eval_f1": 0.9378531073446328,
43
+ "eval_loss": 0.12898202240467072,
44
+ "eval_precision": 0.9431818181818182,
45
+ "eval_recall": 0.9325842696629213,
46
+ "eval_runtime": 0.6116,
47
+ "eval_samples_per_second": 531.418,
48
+ "eval_steps_per_second": 17.986,
49
+ "step": 23
50
+ },
51
+ {
52
+ "epoch": 1.0869565217391304,
53
+ "grad_norm": 2.818490982055664,
54
+ "learning_rate": 2.7173913043478262e-05,
55
+ "loss": 0.1788,
56
+ "step": 25
57
+ },
58
+ {
59
+ "epoch": 1.3043478260869565,
60
+ "grad_norm": 2.19661545753479,
61
+ "learning_rate": 3.260869565217392e-05,
62
+ "loss": 0.1157,
63
+ "step": 30
64
+ },
65
+ {
66
+ "epoch": 1.5217391304347827,
67
+ "grad_norm": 1.980846643447876,
68
+ "learning_rate": 3.804347826086957e-05,
69
+ "loss": 0.077,
70
+ "step": 35
71
+ },
72
+ {
73
+ "epoch": 1.7391304347826086,
74
+ "grad_norm": 1.7503933906555176,
75
+ "learning_rate": 4.347826086956522e-05,
76
+ "loss": 0.076,
77
+ "step": 40
78
+ },
79
+ {
80
+ "epoch": 1.9565217391304348,
81
+ "grad_norm": 4.313005447387695,
82
+ "learning_rate": 4.891304347826087e-05,
83
+ "loss": 0.1233,
84
+ "step": 45
85
+ },
86
+ {
87
+ "epoch": 2.0,
88
+ "eval_accuracy": 0.9876923076923076,
89
+ "eval_f1": 0.9777777777777779,
90
+ "eval_loss": 0.04074087738990784,
91
+ "eval_precision": 0.967032967032967,
92
+ "eval_recall": 0.9887640449438202,
93
+ "eval_runtime": 0.614,
94
+ "eval_samples_per_second": 529.309,
95
+ "eval_steps_per_second": 17.915,
96
+ "step": 46
97
+ },
98
+ {
99
+ "epoch": 2.1739130434782608,
100
+ "grad_norm": 2.5787644386291504,
101
+ "learning_rate": 4.9516908212560386e-05,
102
+ "loss": 0.0834,
103
+ "step": 50
104
+ },
105
+ {
106
+ "epoch": 2.391304347826087,
107
+ "grad_norm": 2.1645395755767822,
108
+ "learning_rate": 4.891304347826087e-05,
109
+ "loss": 0.0538,
110
+ "step": 55
111
+ },
112
+ {
113
+ "epoch": 2.608695652173913,
114
+ "grad_norm": 2.0289194583892822,
115
+ "learning_rate": 4.830917874396135e-05,
116
+ "loss": 0.0749,
117
+ "step": 60
118
+ },
119
+ {
120
+ "epoch": 2.8260869565217392,
121
+ "grad_norm": 1.7418752908706665,
122
+ "learning_rate": 4.770531400966184e-05,
123
+ "loss": 0.0469,
124
+ "step": 65
125
+ },
126
+ {
127
+ "epoch": 3.0,
128
+ "eval_accuracy": 0.9815384615384616,
129
+ "eval_f1": 0.967391304347826,
130
+ "eval_loss": 0.059426549822092056,
131
+ "eval_precision": 0.9368421052631579,
132
+ "eval_recall": 1.0,
133
+ "eval_runtime": 0.6222,
134
+ "eval_samples_per_second": 522.372,
135
+ "eval_steps_per_second": 17.68,
136
+ "step": 69
137
+ },
138
+ {
139
+ "epoch": 3.0434782608695654,
140
+ "grad_norm": 4.78839635848999,
141
+ "learning_rate": 4.710144927536232e-05,
142
+ "loss": 0.0729,
143
+ "step": 70
144
+ },
145
+ {
146
+ "epoch": 3.260869565217391,
147
+ "grad_norm": 2.100724697113037,
148
+ "learning_rate": 4.64975845410628e-05,
149
+ "loss": 0.0588,
150
+ "step": 75
151
+ },
152
+ {
153
+ "epoch": 3.4782608695652173,
154
+ "grad_norm": 2.4715771675109863,
155
+ "learning_rate": 4.589371980676328e-05,
156
+ "loss": 0.0537,
157
+ "step": 80
158
+ },
159
+ {
160
+ "epoch": 3.6956521739130435,
161
+ "grad_norm": 2.045220375061035,
162
+ "learning_rate": 4.528985507246377e-05,
163
+ "loss": 0.0451,
164
+ "step": 85
165
+ },
166
+ {
167
+ "epoch": 3.9130434782608696,
168
+ "grad_norm": 2.7747068405151367,
169
+ "learning_rate": 4.4685990338164255e-05,
170
+ "loss": 0.0394,
171
+ "step": 90
172
+ },
173
+ {
174
+ "epoch": 4.0,
175
+ "eval_accuracy": 0.9876923076923076,
176
+ "eval_f1": 0.9777777777777779,
177
+ "eval_loss": 0.055703382939100266,
178
+ "eval_precision": 0.967032967032967,
179
+ "eval_recall": 0.9887640449438202,
180
+ "eval_runtime": 0.6278,
181
+ "eval_samples_per_second": 517.661,
182
+ "eval_steps_per_second": 17.521,
183
+ "step": 92
184
+ },
185
+ {
186
+ "epoch": 4.130434782608695,
187
+ "grad_norm": 1.9641977548599243,
188
+ "learning_rate": 4.408212560386474e-05,
189
+ "loss": 0.0728,
190
+ "step": 95
191
+ },
192
+ {
193
+ "epoch": 4.3478260869565215,
194
+ "grad_norm": 3.0312821865081787,
195
+ "learning_rate": 4.347826086956522e-05,
196
+ "loss": 0.0611,
197
+ "step": 100
198
+ },
199
+ {
200
+ "epoch": 4.565217391304348,
201
+ "grad_norm": 2.8319077491760254,
202
+ "learning_rate": 4.2874396135265707e-05,
203
+ "loss": 0.0467,
204
+ "step": 105
205
+ },
206
+ {
207
+ "epoch": 4.782608695652174,
208
+ "grad_norm": 0.7883936762809753,
209
+ "learning_rate": 4.2270531400966186e-05,
210
+ "loss": 0.0433,
211
+ "step": 110
212
+ },
213
+ {
214
+ "epoch": 5.0,
215
+ "grad_norm": 3.4693968296051025,
216
+ "learning_rate": 4.166666666666667e-05,
217
+ "loss": 0.0909,
218
+ "step": 115
219
+ },
220
+ {
221
+ "epoch": 5.0,
222
+ "eval_accuracy": 0.9907692307692307,
223
+ "eval_f1": 0.9834254143646408,
224
+ "eval_loss": 0.04008618742227554,
225
+ "eval_precision": 0.967391304347826,
226
+ "eval_recall": 1.0,
227
+ "eval_runtime": 0.6227,
228
+ "eval_samples_per_second": 521.945,
229
+ "eval_steps_per_second": 17.666,
230
+ "step": 115
231
+ },
232
+ {
233
+ "epoch": 5.217391304347826,
234
+ "grad_norm": 2.560603618621826,
235
+ "learning_rate": 4.106280193236715e-05,
236
+ "loss": 0.0683,
237
+ "step": 120
238
+ },
239
+ {
240
+ "epoch": 5.434782608695652,
241
+ "grad_norm": 1.258954644203186,
242
+ "learning_rate": 4.045893719806764e-05,
243
+ "loss": 0.045,
244
+ "step": 125
245
+ },
246
+ {
247
+ "epoch": 5.6521739130434785,
248
+ "grad_norm": 1.6040618419647217,
249
+ "learning_rate": 3.985507246376812e-05,
250
+ "loss": 0.0334,
251
+ "step": 130
252
+ },
253
+ {
254
+ "epoch": 5.869565217391305,
255
+ "grad_norm": 2.980376720428467,
256
+ "learning_rate": 3.92512077294686e-05,
257
+ "loss": 0.05,
258
+ "step": 135
259
+ },
260
+ {
261
+ "epoch": 6.0,
262
+ "eval_accuracy": 0.9876923076923076,
263
+ "eval_f1": 0.9777777777777779,
264
+ "eval_loss": 0.025248104706406593,
265
+ "eval_precision": 0.967032967032967,
266
+ "eval_recall": 0.9887640449438202,
267
+ "eval_runtime": 0.6255,
268
+ "eval_samples_per_second": 519.608,
269
+ "eval_steps_per_second": 17.587,
270
+ "step": 138
271
+ },
272
+ {
273
+ "epoch": 6.086956521739131,
274
+ "grad_norm": 1.547420859336853,
275
+ "learning_rate": 3.864734299516908e-05,
276
+ "loss": 0.0215,
277
+ "step": 140
278
+ },
279
+ {
280
+ "epoch": 6.304347826086957,
281
+ "grad_norm": 1.0269052982330322,
282
+ "learning_rate": 3.804347826086957e-05,
283
+ "loss": 0.0513,
284
+ "step": 145
285
+ },
286
+ {
287
+ "epoch": 6.521739130434782,
288
+ "grad_norm": 1.4050897359848022,
289
+ "learning_rate": 3.743961352657005e-05,
290
+ "loss": 0.0324,
291
+ "step": 150
292
+ },
293
+ {
294
+ "epoch": 6.739130434782608,
295
+ "grad_norm": 2.9087464809417725,
296
+ "learning_rate": 3.6835748792270534e-05,
297
+ "loss": 0.0246,
298
+ "step": 155
299
+ },
300
+ {
301
+ "epoch": 6.956521739130435,
302
+ "grad_norm": 2.945462942123413,
303
+ "learning_rate": 3.6231884057971014e-05,
304
+ "loss": 0.0451,
305
+ "step": 160
306
+ },
307
+ {
308
+ "epoch": 7.0,
309
+ "eval_accuracy": 0.9876923076923076,
310
+ "eval_f1": 0.9772727272727273,
311
+ "eval_loss": 0.02794536016881466,
312
+ "eval_precision": 0.9885057471264368,
313
+ "eval_recall": 0.9662921348314607,
314
+ "eval_runtime": 0.638,
315
+ "eval_samples_per_second": 509.374,
316
+ "eval_steps_per_second": 17.24,
317
+ "step": 161
318
+ },
319
+ {
320
+ "epoch": 7.173913043478261,
321
+ "grad_norm": 0.28256598114967346,
322
+ "learning_rate": 3.56280193236715e-05,
323
+ "loss": 0.0499,
324
+ "step": 165
325
+ },
326
+ {
327
+ "epoch": 7.391304347826087,
328
+ "grad_norm": 2.3740286827087402,
329
+ "learning_rate": 3.502415458937198e-05,
330
+ "loss": 0.0358,
331
+ "step": 170
332
+ },
333
+ {
334
+ "epoch": 7.608695652173913,
335
+ "grad_norm": 1.025823950767517,
336
+ "learning_rate": 3.4420289855072465e-05,
337
+ "loss": 0.0306,
338
+ "step": 175
339
+ },
340
+ {
341
+ "epoch": 7.826086956521739,
342
+ "grad_norm": 0.7179997563362122,
343
+ "learning_rate": 3.381642512077295e-05,
344
+ "loss": 0.0231,
345
+ "step": 180
346
+ },
347
+ {
348
+ "epoch": 8.0,
349
+ "eval_accuracy": 0.9938461538461538,
350
+ "eval_f1": 0.9888888888888888,
351
+ "eval_loss": 0.02782590501010418,
352
+ "eval_precision": 0.978021978021978,
353
+ "eval_recall": 1.0,
354
+ "eval_runtime": 0.6231,
355
+ "eval_samples_per_second": 521.596,
356
+ "eval_steps_per_second": 17.654,
357
+ "step": 184
358
+ },
359
+ {
360
+ "epoch": 8.043478260869565,
361
+ "grad_norm": 0.5594707131385803,
362
+ "learning_rate": 3.321256038647343e-05,
363
+ "loss": 0.024,
364
+ "step": 185
365
+ },
366
+ {
367
+ "epoch": 8.26086956521739,
368
+ "grad_norm": 2.8453502655029297,
369
+ "learning_rate": 3.260869565217392e-05,
370
+ "loss": 0.03,
371
+ "step": 190
372
+ },
373
+ {
374
+ "epoch": 8.478260869565217,
375
+ "grad_norm": 1.5159822702407837,
376
+ "learning_rate": 3.2004830917874396e-05,
377
+ "loss": 0.0203,
378
+ "step": 195
379
+ },
380
+ {
381
+ "epoch": 8.695652173913043,
382
+ "grad_norm": 2.250065326690674,
383
+ "learning_rate": 3.140096618357488e-05,
384
+ "loss": 0.0282,
385
+ "step": 200
386
+ },
387
+ {
388
+ "epoch": 8.91304347826087,
389
+ "grad_norm": 3.3784470558166504,
390
+ "learning_rate": 3.079710144927536e-05,
391
+ "loss": 0.0404,
392
+ "step": 205
393
+ },
394
+ {
395
+ "epoch": 9.0,
396
+ "eval_accuracy": 0.9876923076923076,
397
+ "eval_f1": 0.9775280898876404,
398
+ "eval_loss": 0.025601999834179878,
399
+ "eval_precision": 0.9775280898876404,
400
+ "eval_recall": 0.9775280898876404,
401
+ "eval_runtime": 0.6288,
402
+ "eval_samples_per_second": 516.888,
403
+ "eval_steps_per_second": 17.495,
404
+ "step": 207
405
+ },
406
+ {
407
+ "epoch": 9.130434782608695,
408
+ "grad_norm": 4.156078815460205,
409
+ "learning_rate": 3.0193236714975848e-05,
410
+ "loss": 0.0135,
411
+ "step": 210
412
+ },
413
+ {
414
+ "epoch": 9.347826086956522,
415
+ "grad_norm": 1.2355906963348389,
416
+ "learning_rate": 2.9589371980676327e-05,
417
+ "loss": 0.0377,
418
+ "step": 215
419
+ },
420
+ {
421
+ "epoch": 9.565217391304348,
422
+ "grad_norm": 4.165219783782959,
423
+ "learning_rate": 2.8985507246376814e-05,
424
+ "loss": 0.0354,
425
+ "step": 220
426
+ },
427
+ {
428
+ "epoch": 9.782608695652174,
429
+ "grad_norm": 1.0178390741348267,
430
+ "learning_rate": 2.8381642512077293e-05,
431
+ "loss": 0.0331,
432
+ "step": 225
433
+ },
434
+ {
435
+ "epoch": 10.0,
436
+ "grad_norm": 4.175224304199219,
437
+ "learning_rate": 2.777777777777778e-05,
438
+ "loss": 0.0297,
439
+ "step": 230
440
+ },
441
+ {
442
+ "epoch": 10.0,
443
+ "eval_accuracy": 0.9907692307692307,
444
+ "eval_f1": 0.9832402234636872,
445
+ "eval_loss": 0.02601032890379429,
446
+ "eval_precision": 0.9777777777777777,
447
+ "eval_recall": 0.9887640449438202,
448
+ "eval_runtime": 0.6221,
449
+ "eval_samples_per_second": 522.441,
450
+ "eval_steps_per_second": 17.683,
451
+ "step": 230
452
+ },
453
+ {
454
+ "epoch": 10.217391304347826,
455
+ "grad_norm": 0.14615581929683685,
456
+ "learning_rate": 2.7173913043478262e-05,
457
+ "loss": 0.009,
458
+ "step": 235
459
+ },
460
+ {
461
+ "epoch": 10.434782608695652,
462
+ "grad_norm": 5.359668731689453,
463
+ "learning_rate": 2.6570048309178748e-05,
464
+ "loss": 0.0176,
465
+ "step": 240
466
+ },
467
+ {
468
+ "epoch": 10.652173913043478,
469
+ "grad_norm": 0.387389212846756,
470
+ "learning_rate": 2.5966183574879227e-05,
471
+ "loss": 0.0251,
472
+ "step": 245
473
+ },
474
+ {
475
+ "epoch": 10.869565217391305,
476
+ "grad_norm": 1.7574446201324463,
477
+ "learning_rate": 2.5362318840579714e-05,
478
+ "loss": 0.0327,
479
+ "step": 250
480
+ },
481
+ {
482
+ "epoch": 11.0,
483
+ "eval_accuracy": 0.9938461538461538,
484
+ "eval_f1": 0.9888888888888888,
485
+ "eval_loss": 0.02298262156546116,
486
+ "eval_precision": 0.978021978021978,
487
+ "eval_recall": 1.0,
488
+ "eval_runtime": 0.63,
489
+ "eval_samples_per_second": 515.887,
490
+ "eval_steps_per_second": 17.461,
491
+ "step": 253
492
+ },
493
+ {
494
+ "epoch": 11.08695652173913,
495
+ "grad_norm": 1.124037504196167,
496
+ "learning_rate": 2.4758454106280193e-05,
497
+ "loss": 0.0397,
498
+ "step": 255
499
+ },
500
+ {
501
+ "epoch": 11.304347826086957,
502
+ "grad_norm": 2.474566698074341,
503
+ "learning_rate": 2.4154589371980676e-05,
504
+ "loss": 0.0427,
505
+ "step": 260
506
+ },
507
+ {
508
+ "epoch": 11.521739130434783,
509
+ "grad_norm": 1.6105717420578003,
510
+ "learning_rate": 2.355072463768116e-05,
511
+ "loss": 0.0157,
512
+ "step": 265
513
+ },
514
+ {
515
+ "epoch": 11.73913043478261,
516
+ "grad_norm": 0.6210281252861023,
517
+ "learning_rate": 2.294685990338164e-05,
518
+ "loss": 0.0297,
519
+ "step": 270
520
+ },
521
+ {
522
+ "epoch": 11.956521739130435,
523
+ "grad_norm": 2.6540639400482178,
524
+ "learning_rate": 2.2342995169082127e-05,
525
+ "loss": 0.0221,
526
+ "step": 275
527
+ },
528
+ {
529
+ "epoch": 12.0,
530
+ "eval_accuracy": 0.9969230769230769,
531
+ "eval_f1": 0.9944134078212291,
532
+ "eval_loss": 0.01398832444101572,
533
+ "eval_precision": 0.9888888888888889,
534
+ "eval_recall": 1.0,
535
+ "eval_runtime": 0.6237,
536
+ "eval_samples_per_second": 521.045,
537
+ "eval_steps_per_second": 17.635,
538
+ "step": 276
539
+ },
540
+ {
541
+ "epoch": 12.173913043478262,
542
+ "grad_norm": 2.6183152198791504,
543
+ "learning_rate": 2.173913043478261e-05,
544
+ "loss": 0.0267,
545
+ "step": 280
546
+ },
547
+ {
548
+ "epoch": 12.391304347826088,
549
+ "grad_norm": 0.8627565503120422,
550
+ "learning_rate": 2.1135265700483093e-05,
551
+ "loss": 0.0195,
552
+ "step": 285
553
+ },
554
+ {
555
+ "epoch": 12.608695652173914,
556
+ "grad_norm": 0.576085090637207,
557
+ "learning_rate": 2.0531400966183576e-05,
558
+ "loss": 0.0322,
559
+ "step": 290
560
+ },
561
+ {
562
+ "epoch": 12.826086956521738,
563
+ "grad_norm": 3.6716318130493164,
564
+ "learning_rate": 1.992753623188406e-05,
565
+ "loss": 0.0294,
566
+ "step": 295
567
+ },
568
+ {
569
+ "epoch": 13.0,
570
+ "eval_accuracy": 0.9969230769230769,
571
+ "eval_f1": 0.9944134078212291,
572
+ "eval_loss": 0.01057437900453806,
573
+ "eval_precision": 0.9888888888888889,
574
+ "eval_recall": 1.0,
575
+ "eval_runtime": 0.6324,
576
+ "eval_samples_per_second": 513.948,
577
+ "eval_steps_per_second": 17.395,
578
+ "step": 299
579
+ },
580
+ {
581
+ "epoch": 13.043478260869565,
582
+ "grad_norm": 0.19212637841701508,
583
+ "learning_rate": 1.932367149758454e-05,
584
+ "loss": 0.0298,
585
+ "step": 300
586
+ },
587
+ {
588
+ "epoch": 13.26086956521739,
589
+ "grad_norm": 1.9307483434677124,
590
+ "learning_rate": 1.8719806763285024e-05,
591
+ "loss": 0.0231,
592
+ "step": 305
593
+ },
594
+ {
595
+ "epoch": 13.478260869565217,
596
+ "grad_norm": 2.444979667663574,
597
+ "learning_rate": 1.8115942028985507e-05,
598
+ "loss": 0.0201,
599
+ "step": 310
600
+ },
601
+ {
602
+ "epoch": 13.695652173913043,
603
+ "grad_norm": 2.9044065475463867,
604
+ "learning_rate": 1.751207729468599e-05,
605
+ "loss": 0.0243,
606
+ "step": 315
607
+ },
608
+ {
609
+ "epoch": 13.91304347826087,
610
+ "grad_norm": 1.5113251209259033,
611
+ "learning_rate": 1.6908212560386476e-05,
612
+ "loss": 0.0292,
613
+ "step": 320
614
+ },
615
+ {
616
+ "epoch": 14.0,
617
+ "eval_accuracy": 0.9969230769230769,
618
+ "eval_f1": 0.9944134078212291,
619
+ "eval_loss": 0.013215974904596806,
620
+ "eval_precision": 0.9888888888888889,
621
+ "eval_recall": 1.0,
622
+ "eval_runtime": 0.6382,
623
+ "eval_samples_per_second": 509.263,
624
+ "eval_steps_per_second": 17.237,
625
+ "step": 322
626
+ },
627
+ {
628
+ "epoch": 14.130434782608695,
629
+ "grad_norm": 0.16032224893569946,
630
+ "learning_rate": 1.630434782608696e-05,
631
+ "loss": 0.0056,
632
+ "step": 325
633
+ },
634
+ {
635
+ "epoch": 14.347826086956522,
636
+ "grad_norm": 1.7882447242736816,
637
+ "learning_rate": 1.570048309178744e-05,
638
+ "loss": 0.0203,
639
+ "step": 330
640
+ },
641
+ {
642
+ "epoch": 14.565217391304348,
643
+ "grad_norm": 1.206709861755371,
644
+ "learning_rate": 1.5096618357487924e-05,
645
+ "loss": 0.0205,
646
+ "step": 335
647
+ },
648
+ {
649
+ "epoch": 14.782608695652174,
650
+ "grad_norm": 2.7993412017822266,
651
+ "learning_rate": 1.4492753623188407e-05,
652
+ "loss": 0.0227,
653
+ "step": 340
654
+ },
655
+ {
656
+ "epoch": 15.0,
657
+ "grad_norm": 1.1518446207046509,
658
+ "learning_rate": 1.388888888888889e-05,
659
+ "loss": 0.0064,
660
+ "step": 345
661
+ },
662
+ {
663
+ "epoch": 15.0,
664
+ "eval_accuracy": 0.9907692307692307,
665
+ "eval_f1": 0.9834254143646408,
666
+ "eval_loss": 0.02308599278330803,
667
+ "eval_precision": 0.967391304347826,
668
+ "eval_recall": 1.0,
669
+ "eval_runtime": 0.6311,
670
+ "eval_samples_per_second": 514.958,
671
+ "eval_steps_per_second": 17.429,
672
+ "step": 345
673
+ },
674
+ {
675
+ "epoch": 15.217391304347826,
676
+ "grad_norm": 1.4991214275360107,
677
+ "learning_rate": 1.3285024154589374e-05,
678
+ "loss": 0.0152,
679
+ "step": 350
680
+ },
681
+ {
682
+ "epoch": 15.434782608695652,
683
+ "grad_norm": 1.1912800073623657,
684
+ "learning_rate": 1.2681159420289857e-05,
685
+ "loss": 0.0066,
686
+ "step": 355
687
+ },
688
+ {
689
+ "epoch": 15.652173913043478,
690
+ "grad_norm": 0.4402332603931427,
691
+ "learning_rate": 1.2077294685990338e-05,
692
+ "loss": 0.0051,
693
+ "step": 360
694
+ },
695
+ {
696
+ "epoch": 15.869565217391305,
697
+ "grad_norm": 0.8945227861404419,
698
+ "learning_rate": 1.147342995169082e-05,
699
+ "loss": 0.02,
700
+ "step": 365
701
+ },
702
+ {
703
+ "epoch": 16.0,
704
+ "eval_accuracy": 0.9969230769230769,
705
+ "eval_f1": 0.9944134078212291,
706
+ "eval_loss": 0.008727076463401318,
707
+ "eval_precision": 0.9888888888888889,
708
+ "eval_recall": 1.0,
709
+ "eval_runtime": 0.6362,
710
+ "eval_samples_per_second": 510.858,
711
+ "eval_steps_per_second": 17.291,
712
+ "step": 368
713
+ },
714
+ {
715
+ "epoch": 16.08695652173913,
716
+ "grad_norm": 1.353935718536377,
717
+ "learning_rate": 1.0869565217391305e-05,
718
+ "loss": 0.0422,
719
+ "step": 370
720
+ },
721
+ {
722
+ "epoch": 16.304347826086957,
723
+ "grad_norm": 2.2760603427886963,
724
+ "learning_rate": 1.0265700483091788e-05,
725
+ "loss": 0.0314,
726
+ "step": 375
727
+ },
728
+ {
729
+ "epoch": 16.52173913043478,
730
+ "grad_norm": 1.7415324449539185,
731
+ "learning_rate": 9.66183574879227e-06,
732
+ "loss": 0.0105,
733
+ "step": 380
734
+ },
735
+ {
736
+ "epoch": 16.73913043478261,
737
+ "grad_norm": 0.31125152111053467,
738
+ "learning_rate": 9.057971014492753e-06,
739
+ "loss": 0.0249,
740
+ "step": 385
741
+ },
742
+ {
743
+ "epoch": 16.956521739130434,
744
+ "grad_norm": 3.789961814880371,
745
+ "learning_rate": 8.454106280193238e-06,
746
+ "loss": 0.0356,
747
+ "step": 390
748
+ },
749
+ {
750
+ "epoch": 17.0,
751
+ "eval_accuracy": 0.9969230769230769,
752
+ "eval_f1": 0.9944134078212291,
753
+ "eval_loss": 0.011419730260968208,
754
+ "eval_precision": 0.9888888888888889,
755
+ "eval_recall": 1.0,
756
+ "eval_runtime": 0.6273,
757
+ "eval_samples_per_second": 518.129,
758
+ "eval_steps_per_second": 17.537,
759
+ "step": 391
760
+ },
761
+ {
762
+ "epoch": 17.17391304347826,
763
+ "grad_norm": 0.2875424027442932,
764
+ "learning_rate": 7.85024154589372e-06,
765
+ "loss": 0.0102,
766
+ "step": 395
767
+ },
768
+ {
769
+ "epoch": 17.391304347826086,
770
+ "grad_norm": 1.2512189149856567,
771
+ "learning_rate": 7.246376811594203e-06,
772
+ "loss": 0.0114,
773
+ "step": 400
774
+ },
775
+ {
776
+ "epoch": 17.608695652173914,
777
+ "grad_norm": 2.634803295135498,
778
+ "learning_rate": 6.642512077294687e-06,
779
+ "loss": 0.0274,
780
+ "step": 405
781
+ },
782
+ {
783
+ "epoch": 17.82608695652174,
784
+ "grad_norm": 1.524092674255371,
785
+ "learning_rate": 6.038647342995169e-06,
786
+ "loss": 0.0232,
787
+ "step": 410
788
+ },
789
+ {
790
+ "epoch": 18.0,
791
+ "eval_accuracy": 1.0,
792
+ "eval_f1": 1.0,
793
+ "eval_loss": 0.00724475271999836,
794
+ "eval_precision": 1.0,
795
+ "eval_recall": 1.0,
796
+ "eval_runtime": 0.6289,
797
+ "eval_samples_per_second": 516.744,
798
+ "eval_steps_per_second": 17.49,
799
+ "step": 414
800
+ },
801
+ {
802
+ "epoch": 18.043478260869566,
803
+ "grad_norm": 0.8228383660316467,
804
+ "learning_rate": 5.4347826086956525e-06,
805
+ "loss": 0.0187,
806
+ "step": 415
807
+ },
808
+ {
809
+ "epoch": 18.26086956521739,
810
+ "grad_norm": 0.26993948221206665,
811
+ "learning_rate": 4.830917874396135e-06,
812
+ "loss": 0.0176,
813
+ "step": 420
814
+ },
815
+ {
816
+ "epoch": 18.47826086956522,
817
+ "grad_norm": 1.4451013803482056,
818
+ "learning_rate": 4.227053140096619e-06,
819
+ "loss": 0.017,
820
+ "step": 425
821
+ },
822
+ {
823
+ "epoch": 18.695652173913043,
824
+ "grad_norm": 1.0776654481887817,
825
+ "learning_rate": 3.6231884057971017e-06,
826
+ "loss": 0.0225,
827
+ "step": 430
828
+ },
829
+ {
830
+ "epoch": 18.91304347826087,
831
+ "grad_norm": 0.6803100109100342,
832
+ "learning_rate": 3.0193236714975845e-06,
833
+ "loss": 0.0351,
834
+ "step": 435
835
+ },
836
+ {
837
+ "epoch": 19.0,
838
+ "eval_accuracy": 0.9969230769230769,
839
+ "eval_f1": 0.9944134078212291,
840
+ "eval_loss": 0.008730148896574974,
841
+ "eval_precision": 0.9888888888888889,
842
+ "eval_recall": 1.0,
843
+ "eval_runtime": 0.629,
844
+ "eval_samples_per_second": 516.656,
845
+ "eval_steps_per_second": 17.487,
846
+ "step": 437
847
+ },
848
+ {
849
+ "epoch": 19.130434782608695,
850
+ "grad_norm": 3.2962541580200195,
851
+ "learning_rate": 2.4154589371980677e-06,
852
+ "loss": 0.0138,
853
+ "step": 440
854
+ },
855
+ {
856
+ "epoch": 19.347826086956523,
857
+ "grad_norm": 2.5289676189422607,
858
+ "learning_rate": 1.8115942028985508e-06,
859
+ "loss": 0.0192,
860
+ "step": 445
861
+ },
862
+ {
863
+ "epoch": 19.565217391304348,
864
+ "grad_norm": 0.09774911403656006,
865
+ "learning_rate": 1.2077294685990338e-06,
866
+ "loss": 0.0136,
867
+ "step": 450
868
+ },
869
+ {
870
+ "epoch": 19.782608695652176,
871
+ "grad_norm": 0.9112738370895386,
872
+ "learning_rate": 6.038647342995169e-07,
873
+ "loss": 0.0213,
874
+ "step": 455
875
+ },
876
+ {
877
+ "epoch": 20.0,
878
+ "grad_norm": 0.6033933162689209,
879
+ "learning_rate": 0.0,
880
+ "loss": 0.0155,
881
+ "step": 460
882
+ },
883
+ {
884
+ "epoch": 20.0,
885
+ "eval_accuracy": 0.9969230769230769,
886
+ "eval_f1": 0.9944134078212291,
887
+ "eval_loss": 0.007528003770858049,
888
+ "eval_precision": 0.9888888888888889,
889
+ "eval_recall": 1.0,
890
+ "eval_runtime": 0.6414,
891
+ "eval_samples_per_second": 506.707,
892
+ "eval_steps_per_second": 17.15,
893
+ "step": 460
894
+ },
895
+ {
896
+ "epoch": 20.0,
897
+ "step": 460,
898
+ "total_flos": 1.4530811161131418e+18,
899
+ "train_loss": 0.0580909106111073,
900
+ "train_runtime": 291.8902,
901
+ "train_samples_per_second": 200.281,
902
+ "train_steps_per_second": 1.576
903
+ }
904
+ ],
905
+ "logging_steps": 5,
906
+ "max_steps": 460,
907
+ "num_input_tokens_seen": 0,
908
+ "num_train_epochs": 20,
909
+ "save_steps": 500,
910
+ "stateful_callbacks": {
911
+ "TrainerControl": {
912
+ "args": {
913
+ "should_epoch_stop": false,
914
+ "should_evaluate": false,
915
+ "should_log": false,
916
+ "should_save": true,
917
+ "should_training_stop": true
918
+ },
919
+ "attributes": {}
920
+ }
921
+ },
922
+ "total_flos": 1.4530811161131418e+18,
923
+ "train_batch_size": 32,
924
+ "trial_name": null,
925
+ "trial_params": null
926
+ }