samadriaz commited on
Commit
4264b4f
1 Parent(s): 53992b3

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +7 -7
  2. test_results.json +7 -7
  3. trainer_state.json +442 -516
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
- {
2
- "epoch": 3.25,
3
- "eval_accuracy": 0.9290322580645162,
4
- "eval_loss": 0.3026686906814575,
5
- "eval_runtime": 47.1322,
6
- "eval_samples_per_second": 3.289,
7
- "eval_steps_per_second": 1.655
8
  }
 
1
+ {
2
+ "epoch": 3.25,
3
+ "eval_accuracy": 0.9419354838709677,
4
+ "eval_loss": 0.1840752214193344,
5
+ "eval_runtime": 33.5289,
6
+ "eval_samples_per_second": 4.623,
7
+ "eval_steps_per_second": 2.326
8
  }
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
- {
2
- "epoch": 3.25,
3
- "eval_accuracy": 0.9290322580645162,
4
- "eval_loss": 0.3026686906814575,
5
- "eval_runtime": 47.1322,
6
- "eval_samples_per_second": 3.289,
7
- "eval_steps_per_second": 1.655
8
  }
 
1
+ {
2
+ "epoch": 3.25,
3
+ "eval_accuracy": 0.9419354838709677,
4
+ "eval_loss": 0.1840752214193344,
5
+ "eval_runtime": 33.5289,
6
+ "eval_samples_per_second": 4.623,
7
+ "eval_steps_per_second": 2.326
8
  }
trainer_state.json CHANGED
@@ -1,516 +1,442 @@
1
- {
2
- "best_metric": 0.9714285714285714,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-600",
4
- "epoch": 3.25,
5
- "eval_steps": 500,
6
- "global_step": 600,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.016666666666666666,
13
- "grad_norm": 13.758747100830078,
14
- "learning_rate": 8.333333333333334e-06,
15
- "loss": 2.4559,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.03333333333333333,
20
- "grad_norm": 12.61396312713623,
21
- "learning_rate": 1.6666666666666667e-05,
22
- "loss": 2.3645,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.05,
27
- "grad_norm": 14.183613777160645,
28
- "learning_rate": 2.5e-05,
29
- "loss": 2.3001,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.06666666666666667,
34
- "grad_norm": 14.415635108947754,
35
- "learning_rate": 3.3333333333333335e-05,
36
- "loss": 2.2341,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.08333333333333333,
41
- "grad_norm": 16.781152725219727,
42
- "learning_rate": 4.166666666666667e-05,
43
- "loss": 2.0842,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.1,
48
- "grad_norm": 14.559596061706543,
49
- "learning_rate": 5e-05,
50
- "loss": 2.1504,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.11666666666666667,
55
- "grad_norm": 17.537364959716797,
56
- "learning_rate": 4.9074074074074075e-05,
57
- "loss": 2.2161,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.13333333333333333,
62
- "grad_norm": 21.04912567138672,
63
- "learning_rate": 4.814814814814815e-05,
64
- "loss": 1.8075,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.15,
69
- "grad_norm": 16.51645851135254,
70
- "learning_rate": 4.722222222222222e-05,
71
- "loss": 1.9279,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.16666666666666666,
76
- "grad_norm": 22.05646324157715,
77
- "learning_rate": 4.62962962962963e-05,
78
- "loss": 1.7128,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.18333333333333332,
83
- "grad_norm": 16.731571197509766,
84
- "learning_rate": 4.5370370370370374e-05,
85
- "loss": 1.9934,
86
- "step": 110
87
- },
88
- {
89
- "epoch": 0.2,
90
- "grad_norm": 11.04407024383545,
91
- "learning_rate": 4.4444444444444447e-05,
92
- "loss": 1.5547,
93
- "step": 120
94
- },
95
- {
96
- "epoch": 0.21666666666666667,
97
- "grad_norm": 21.608762741088867,
98
- "learning_rate": 4.351851851851852e-05,
99
- "loss": 1.6177,
100
- "step": 130
101
- },
102
- {
103
- "epoch": 0.23333333333333334,
104
- "grad_norm": 19.684383392333984,
105
- "learning_rate": 4.259259259259259e-05,
106
- "loss": 1.4555,
107
- "step": 140
108
- },
109
- {
110
- "epoch": 0.25,
111
- "grad_norm": 20.806903839111328,
112
- "learning_rate": 4.166666666666667e-05,
113
- "loss": 1.3081,
114
- "step": 150
115
- },
116
- {
117
- "epoch": 0.25,
118
- "eval_accuracy": 0.7142857142857143,
119
- "eval_loss": 1.0638517141342163,
120
- "eval_runtime": 25.4265,
121
- "eval_samples_per_second": 2.753,
122
- "eval_steps_per_second": 1.377,
123
- "step": 150
124
- },
125
- {
126
- "epoch": 1.0166666666666666,
127
- "grad_norm": 17.35706901550293,
128
- "learning_rate": 4.074074074074074e-05,
129
- "loss": 0.8822,
130
- "step": 160
131
- },
132
- {
133
- "epoch": 1.0333333333333334,
134
- "grad_norm": 19.001983642578125,
135
- "learning_rate": 3.981481481481482e-05,
136
- "loss": 1.138,
137
- "step": 170
138
- },
139
- {
140
- "epoch": 1.05,
141
- "grad_norm": 12.834399223327637,
142
- "learning_rate": 3.888888888888889e-05,
143
- "loss": 1.1393,
144
- "step": 180
145
- },
146
- {
147
- "epoch": 1.0666666666666667,
148
- "grad_norm": 6.843353271484375,
149
- "learning_rate": 3.7962962962962964e-05,
150
- "loss": 0.808,
151
- "step": 190
152
- },
153
- {
154
- "epoch": 1.0833333333333333,
155
- "grad_norm": 36.57607650756836,
156
- "learning_rate": 3.7037037037037037e-05,
157
- "loss": 0.8905,
158
- "step": 200
159
- },
160
- {
161
- "epoch": 1.1,
162
- "grad_norm": 13.027900695800781,
163
- "learning_rate": 3.611111111111111e-05,
164
- "loss": 0.7259,
165
- "step": 210
166
- },
167
- {
168
- "epoch": 1.1166666666666667,
169
- "grad_norm": 8.811443328857422,
170
- "learning_rate": 3.518518518518519e-05,
171
- "loss": 0.9753,
172
- "step": 220
173
- },
174
- {
175
- "epoch": 1.1333333333333333,
176
- "grad_norm": 10.535086631774902,
177
- "learning_rate": 3.425925925925926e-05,
178
- "loss": 0.9016,
179
- "step": 230
180
- },
181
- {
182
- "epoch": 1.15,
183
- "grad_norm": 11.875638961791992,
184
- "learning_rate": 3.3333333333333335e-05,
185
- "loss": 0.6332,
186
- "step": 240
187
- },
188
- {
189
- "epoch": 1.1666666666666667,
190
- "grad_norm": 26.462709426879883,
191
- "learning_rate": 3.240740740740741e-05,
192
- "loss": 0.6244,
193
- "step": 250
194
- },
195
- {
196
- "epoch": 1.1833333333333333,
197
- "grad_norm": 10.813070297241211,
198
- "learning_rate": 3.148148148148148e-05,
199
- "loss": 0.4792,
200
- "step": 260
201
- },
202
- {
203
- "epoch": 1.2,
204
- "grad_norm": 78.73821258544922,
205
- "learning_rate": 3.055555555555556e-05,
206
- "loss": 0.6063,
207
- "step": 270
208
- },
209
- {
210
- "epoch": 1.2166666666666668,
211
- "grad_norm": 10.075183868408203,
212
- "learning_rate": 2.962962962962963e-05,
213
- "loss": 0.1947,
214
- "step": 280
215
- },
216
- {
217
- "epoch": 1.2333333333333334,
218
- "grad_norm": 19.350034713745117,
219
- "learning_rate": 2.8703703703703706e-05,
220
- "loss": 1.0975,
221
- "step": 290
222
- },
223
- {
224
- "epoch": 1.25,
225
- "grad_norm": 2.7459921836853027,
226
- "learning_rate": 2.777777777777778e-05,
227
- "loss": 0.8783,
228
- "step": 300
229
- },
230
- {
231
- "epoch": 1.25,
232
- "eval_accuracy": 0.7142857142857143,
233
- "eval_loss": 0.6895671486854553,
234
- "eval_runtime": 20.4608,
235
- "eval_samples_per_second": 3.421,
236
- "eval_steps_per_second": 1.711,
237
- "step": 300
238
- },
239
- {
240
- "epoch": 2.0166666666666666,
241
- "grad_norm": 4.0030622482299805,
242
- "learning_rate": 2.6851851851851855e-05,
243
- "loss": 0.3401,
244
- "step": 310
245
- },
246
- {
247
- "epoch": 2.033333333333333,
248
- "grad_norm": 0.7160613536834717,
249
- "learning_rate": 2.5925925925925925e-05,
250
- "loss": 0.2488,
251
- "step": 320
252
- },
253
- {
254
- "epoch": 2.05,
255
- "grad_norm": 10.729706764221191,
256
- "learning_rate": 2.5e-05,
257
- "loss": 0.3801,
258
- "step": 330
259
- },
260
- {
261
- "epoch": 2.066666666666667,
262
- "grad_norm": 16.794353485107422,
263
- "learning_rate": 2.4074074074074074e-05,
264
- "loss": 0.1913,
265
- "step": 340
266
- },
267
- {
268
- "epoch": 2.0833333333333335,
269
- "grad_norm": 4.773400783538818,
270
- "learning_rate": 2.314814814814815e-05,
271
- "loss": 0.2786,
272
- "step": 350
273
- },
274
- {
275
- "epoch": 2.1,
276
- "grad_norm": 6.437568187713623,
277
- "learning_rate": 2.2222222222222223e-05,
278
- "loss": 0.3877,
279
- "step": 360
280
- },
281
- {
282
- "epoch": 2.1166666666666667,
283
- "grad_norm": 0.9648570418357849,
284
- "learning_rate": 2.1296296296296296e-05,
285
- "loss": 0.6653,
286
- "step": 370
287
- },
288
- {
289
- "epoch": 2.1333333333333333,
290
- "grad_norm": 0.9003952741622925,
291
- "learning_rate": 2.037037037037037e-05,
292
- "loss": 0.1758,
293
- "step": 380
294
- },
295
- {
296
- "epoch": 2.15,
297
- "grad_norm": 0.12388920783996582,
298
- "learning_rate": 1.9444444444444445e-05,
299
- "loss": 0.2687,
300
- "step": 390
301
- },
302
- {
303
- "epoch": 2.1666666666666665,
304
- "grad_norm": 1.9126410484313965,
305
- "learning_rate": 1.8518518518518518e-05,
306
- "loss": 0.4036,
307
- "step": 400
308
- },
309
- {
310
- "epoch": 2.183333333333333,
311
- "grad_norm": 7.584377288818359,
312
- "learning_rate": 1.7592592592592595e-05,
313
- "loss": 0.3556,
314
- "step": 410
315
- },
316
- {
317
- "epoch": 2.2,
318
- "grad_norm": 92.24506378173828,
319
- "learning_rate": 1.6666666666666667e-05,
320
- "loss": 0.2891,
321
- "step": 420
322
- },
323
- {
324
- "epoch": 2.216666666666667,
325
- "grad_norm": 5.879292964935303,
326
- "learning_rate": 1.574074074074074e-05,
327
- "loss": 0.5045,
328
- "step": 430
329
- },
330
- {
331
- "epoch": 2.2333333333333334,
332
- "grad_norm": 0.2639165222644806,
333
- "learning_rate": 1.4814814814814815e-05,
334
- "loss": 0.1212,
335
- "step": 440
336
- },
337
- {
338
- "epoch": 2.25,
339
- "grad_norm": 44.12073516845703,
340
- "learning_rate": 1.388888888888889e-05,
341
- "loss": 0.1599,
342
- "step": 450
343
- },
344
- {
345
- "epoch": 2.25,
346
- "eval_accuracy": 0.8,
347
- "eval_loss": 0.635800838470459,
348
- "eval_runtime": 20.6071,
349
- "eval_samples_per_second": 3.397,
350
- "eval_steps_per_second": 1.698,
351
- "step": 450
352
- },
353
- {
354
- "epoch": 3.0166666666666666,
355
- "grad_norm": 0.2862427532672882,
356
- "learning_rate": 1.2962962962962962e-05,
357
- "loss": 0.1759,
358
- "step": 460
359
- },
360
- {
361
- "epoch": 3.033333333333333,
362
- "grad_norm": 2.758387804031372,
363
- "learning_rate": 1.2037037037037037e-05,
364
- "loss": 0.0704,
365
- "step": 470
366
- },
367
- {
368
- "epoch": 3.05,
369
- "grad_norm": 0.12285558879375458,
370
- "learning_rate": 1.1111111111111112e-05,
371
- "loss": 0.0226,
372
- "step": 480
373
- },
374
- {
375
- "epoch": 3.066666666666667,
376
- "grad_norm": 0.1087489128112793,
377
- "learning_rate": 1.0185185185185185e-05,
378
- "loss": 0.0404,
379
- "step": 490
380
- },
381
- {
382
- "epoch": 3.0833333333333335,
383
- "grad_norm": 0.12313449382781982,
384
- "learning_rate": 9.259259259259259e-06,
385
- "loss": 0.0202,
386
- "step": 500
387
- },
388
- {
389
- "epoch": 3.1,
390
- "grad_norm": 32.441524505615234,
391
- "learning_rate": 8.333333333333334e-06,
392
- "loss": 0.333,
393
- "step": 510
394
- },
395
- {
396
- "epoch": 3.1166666666666667,
397
- "grad_norm": 0.08568418771028519,
398
- "learning_rate": 7.4074074074074075e-06,
399
- "loss": 0.1238,
400
- "step": 520
401
- },
402
- {
403
- "epoch": 3.1333333333333333,
404
- "grad_norm": 3.2110307216644287,
405
- "learning_rate": 6.481481481481481e-06,
406
- "loss": 0.0516,
407
- "step": 530
408
- },
409
- {
410
- "epoch": 3.15,
411
- "grad_norm": 0.43686577677726746,
412
- "learning_rate": 5.555555555555556e-06,
413
- "loss": 0.0699,
414
- "step": 540
415
- },
416
- {
417
- "epoch": 3.1666666666666665,
418
- "grad_norm": 3.2595551013946533,
419
- "learning_rate": 4.6296296296296296e-06,
420
- "loss": 0.2713,
421
- "step": 550
422
- },
423
- {
424
- "epoch": 3.183333333333333,
425
- "grad_norm": 0.06874556094408035,
426
- "learning_rate": 3.7037037037037037e-06,
427
- "loss": 0.1937,
428
- "step": 560
429
- },
430
- {
431
- "epoch": 3.2,
432
- "grad_norm": 0.22142885625362396,
433
- "learning_rate": 2.777777777777778e-06,
434
- "loss": 0.1291,
435
- "step": 570
436
- },
437
- {
438
- "epoch": 3.216666666666667,
439
- "grad_norm": 0.7484509944915771,
440
- "learning_rate": 1.8518518518518519e-06,
441
- "loss": 0.1577,
442
- "step": 580
443
- },
444
- {
445
- "epoch": 3.2333333333333334,
446
- "grad_norm": 0.09250544011592865,
447
- "learning_rate": 9.259259259259259e-07,
448
- "loss": 0.3134,
449
- "step": 590
450
- },
451
- {
452
- "epoch": 3.25,
453
- "grad_norm": 0.095377117395401,
454
- "learning_rate": 0.0,
455
- "loss": 0.4586,
456
- "step": 600
457
- },
458
- {
459
- "epoch": 3.25,
460
- "eval_accuracy": 0.9714285714285714,
461
- "eval_loss": 0.140297994017601,
462
- "eval_runtime": 21.4607,
463
- "eval_samples_per_second": 3.262,
464
- "eval_steps_per_second": 1.631,
465
- "step": 600
466
- },
467
- {
468
- "epoch": 3.25,
469
- "step": 600,
470
- "total_flos": 1.495384188125184e+18,
471
- "train_loss": 0.805992950797081,
472
- "train_runtime": 890.3074,
473
- "train_samples_per_second": 1.348,
474
- "train_steps_per_second": 0.674
475
- },
476
- {
477
- "epoch": 3.25,
478
- "eval_accuracy": 0.9290322580645162,
479
- "eval_loss": 0.30266863107681274,
480
- "eval_runtime": 45.6116,
481
- "eval_samples_per_second": 3.398,
482
- "eval_steps_per_second": 1.71,
483
- "step": 600
484
- },
485
- {
486
- "epoch": 3.25,
487
- "eval_accuracy": 0.9290322580645162,
488
- "eval_loss": 0.3026686906814575,
489
- "eval_runtime": 47.1322,
490
- "eval_samples_per_second": 3.289,
491
- "eval_steps_per_second": 1.655,
492
- "step": 600
493
- }
494
- ],
495
- "logging_steps": 10,
496
- "max_steps": 600,
497
- "num_input_tokens_seen": 0,
498
- "num_train_epochs": 9223372036854775807,
499
- "save_steps": 500,
500
- "stateful_callbacks": {
501
- "TrainerControl": {
502
- "args": {
503
- "should_epoch_stop": false,
504
- "should_evaluate": false,
505
- "should_log": false,
506
- "should_save": true,
507
- "should_training_stop": true
508
- },
509
- "attributes": {}
510
- }
511
- },
512
- "total_flos": 1.495384188125184e+18,
513
- "train_batch_size": 2,
514
- "trial_name": null,
515
- "trial_params": null
516
- }
 
1
+ {
2
+ "best_metric": 0.9142857142857143,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset\\checkpoint-600",
4
+ "epoch": 3.25,
5
+ "eval_steps": 500,
6
+ "global_step": 600,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "learning_rate": 8.333333333333334e-06,
14
+ "loss": 2.3746,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.03,
19
+ "learning_rate": 1.6666666666666667e-05,
20
+ "loss": 2.2591,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.05,
25
+ "learning_rate": 2.5e-05,
26
+ "loss": 2.2664,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.07,
31
+ "learning_rate": 3.3333333333333335e-05,
32
+ "loss": 2.2028,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.08,
37
+ "learning_rate": 4.166666666666667e-05,
38
+ "loss": 2.1605,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.1,
43
+ "learning_rate": 5e-05,
44
+ "loss": 2.2635,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.12,
49
+ "learning_rate": 4.9074074074074075e-05,
50
+ "loss": 2.1405,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.13,
55
+ "learning_rate": 4.814814814814815e-05,
56
+ "loss": 1.8355,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.15,
61
+ "learning_rate": 4.722222222222222e-05,
62
+ "loss": 1.863,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.17,
67
+ "learning_rate": 4.62962962962963e-05,
68
+ "loss": 1.7627,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.18,
73
+ "learning_rate": 4.5370370370370374e-05,
74
+ "loss": 1.6975,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 0.2,
79
+ "learning_rate": 4.4444444444444447e-05,
80
+ "loss": 1.6183,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 0.22,
85
+ "learning_rate": 4.351851851851852e-05,
86
+ "loss": 1.4872,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 0.23,
91
+ "learning_rate": 4.259259259259259e-05,
92
+ "loss": 1.575,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 0.25,
97
+ "learning_rate": 4.166666666666667e-05,
98
+ "loss": 1.3469,
99
+ "step": 150
100
+ },
101
+ {
102
+ "epoch": 0.25,
103
+ "eval_accuracy": 0.5857142857142857,
104
+ "eval_loss": 1.142739176750183,
105
+ "eval_runtime": 17.96,
106
+ "eval_samples_per_second": 3.898,
107
+ "eval_steps_per_second": 1.949,
108
+ "step": 150
109
+ },
110
+ {
111
+ "epoch": 1.02,
112
+ "learning_rate": 4.074074074074074e-05,
113
+ "loss": 1.0782,
114
+ "step": 160
115
+ },
116
+ {
117
+ "epoch": 1.03,
118
+ "learning_rate": 3.981481481481482e-05,
119
+ "loss": 1.1256,
120
+ "step": 170
121
+ },
122
+ {
123
+ "epoch": 1.05,
124
+ "learning_rate": 3.888888888888889e-05,
125
+ "loss": 1.0364,
126
+ "step": 180
127
+ },
128
+ {
129
+ "epoch": 1.07,
130
+ "learning_rate": 3.7962962962962964e-05,
131
+ "loss": 0.8776,
132
+ "step": 190
133
+ },
134
+ {
135
+ "epoch": 1.08,
136
+ "learning_rate": 3.7037037037037037e-05,
137
+ "loss": 0.8022,
138
+ "step": 200
139
+ },
140
+ {
141
+ "epoch": 1.1,
142
+ "learning_rate": 3.611111111111111e-05,
143
+ "loss": 0.6855,
144
+ "step": 210
145
+ },
146
+ {
147
+ "epoch": 1.12,
148
+ "learning_rate": 3.518518518518519e-05,
149
+ "loss": 0.6916,
150
+ "step": 220
151
+ },
152
+ {
153
+ "epoch": 1.13,
154
+ "learning_rate": 3.425925925925926e-05,
155
+ "loss": 0.9892,
156
+ "step": 230
157
+ },
158
+ {
159
+ "epoch": 1.15,
160
+ "learning_rate": 3.3333333333333335e-05,
161
+ "loss": 0.8527,
162
+ "step": 240
163
+ },
164
+ {
165
+ "epoch": 1.17,
166
+ "learning_rate": 3.240740740740741e-05,
167
+ "loss": 0.4278,
168
+ "step": 250
169
+ },
170
+ {
171
+ "epoch": 1.18,
172
+ "learning_rate": 3.148148148148148e-05,
173
+ "loss": 0.6122,
174
+ "step": 260
175
+ },
176
+ {
177
+ "epoch": 1.2,
178
+ "learning_rate": 3.055555555555556e-05,
179
+ "loss": 0.7313,
180
+ "step": 270
181
+ },
182
+ {
183
+ "epoch": 1.22,
184
+ "learning_rate": 2.962962962962963e-05,
185
+ "loss": 0.4492,
186
+ "step": 280
187
+ },
188
+ {
189
+ "epoch": 1.23,
190
+ "learning_rate": 2.8703703703703706e-05,
191
+ "loss": 0.7908,
192
+ "step": 290
193
+ },
194
+ {
195
+ "epoch": 1.25,
196
+ "learning_rate": 2.777777777777778e-05,
197
+ "loss": 0.9358,
198
+ "step": 300
199
+ },
200
+ {
201
+ "epoch": 1.25,
202
+ "eval_accuracy": 0.8,
203
+ "eval_loss": 0.5032853484153748,
204
+ "eval_runtime": 17.2748,
205
+ "eval_samples_per_second": 4.052,
206
+ "eval_steps_per_second": 2.026,
207
+ "step": 300
208
+ },
209
+ {
210
+ "epoch": 2.02,
211
+ "learning_rate": 2.6851851851851855e-05,
212
+ "loss": 0.2851,
213
+ "step": 310
214
+ },
215
+ {
216
+ "epoch": 2.03,
217
+ "learning_rate": 2.5925925925925925e-05,
218
+ "loss": 0.1723,
219
+ "step": 320
220
+ },
221
+ {
222
+ "epoch": 2.05,
223
+ "learning_rate": 2.5e-05,
224
+ "loss": 0.3936,
225
+ "step": 330
226
+ },
227
+ {
228
+ "epoch": 2.07,
229
+ "learning_rate": 2.4074074074074074e-05,
230
+ "loss": 0.257,
231
+ "step": 340
232
+ },
233
+ {
234
+ "epoch": 2.08,
235
+ "learning_rate": 2.314814814814815e-05,
236
+ "loss": 0.4567,
237
+ "step": 350
238
+ },
239
+ {
240
+ "epoch": 2.1,
241
+ "learning_rate": 2.2222222222222223e-05,
242
+ "loss": 0.3053,
243
+ "step": 360
244
+ },
245
+ {
246
+ "epoch": 2.12,
247
+ "learning_rate": 2.1296296296296296e-05,
248
+ "loss": 0.4902,
249
+ "step": 370
250
+ },
251
+ {
252
+ "epoch": 2.13,
253
+ "learning_rate": 2.037037037037037e-05,
254
+ "loss": 0.1633,
255
+ "step": 380
256
+ },
257
+ {
258
+ "epoch": 2.15,
259
+ "learning_rate": 1.9444444444444445e-05,
260
+ "loss": 0.1947,
261
+ "step": 390
262
+ },
263
+ {
264
+ "epoch": 2.17,
265
+ "learning_rate": 1.8518518518518518e-05,
266
+ "loss": 0.2165,
267
+ "step": 400
268
+ },
269
+ {
270
+ "epoch": 2.18,
271
+ "learning_rate": 1.7592592592592595e-05,
272
+ "loss": 0.0436,
273
+ "step": 410
274
+ },
275
+ {
276
+ "epoch": 2.2,
277
+ "learning_rate": 1.6666666666666667e-05,
278
+ "loss": 0.1382,
279
+ "step": 420
280
+ },
281
+ {
282
+ "epoch": 2.22,
283
+ "learning_rate": 1.574074074074074e-05,
284
+ "loss": 0.4627,
285
+ "step": 430
286
+ },
287
+ {
288
+ "epoch": 2.23,
289
+ "learning_rate": 1.4814814814814815e-05,
290
+ "loss": 0.0275,
291
+ "step": 440
292
+ },
293
+ {
294
+ "epoch": 2.25,
295
+ "learning_rate": 1.388888888888889e-05,
296
+ "loss": 0.2002,
297
+ "step": 450
298
+ },
299
+ {
300
+ "epoch": 2.25,
301
+ "eval_accuracy": 0.8285714285714286,
302
+ "eval_loss": 0.45540377497673035,
303
+ "eval_runtime": 17.6936,
304
+ "eval_samples_per_second": 3.956,
305
+ "eval_steps_per_second": 1.978,
306
+ "step": 450
307
+ },
308
+ {
309
+ "epoch": 3.02,
310
+ "learning_rate": 1.2962962962962962e-05,
311
+ "loss": 0.07,
312
+ "step": 460
313
+ },
314
+ {
315
+ "epoch": 3.03,
316
+ "learning_rate": 1.2037037037037037e-05,
317
+ "loss": 0.083,
318
+ "step": 470
319
+ },
320
+ {
321
+ "epoch": 3.05,
322
+ "learning_rate": 1.1111111111111112e-05,
323
+ "loss": 0.0272,
324
+ "step": 480
325
+ },
326
+ {
327
+ "epoch": 3.07,
328
+ "learning_rate": 1.0185185185185185e-05,
329
+ "loss": 0.0161,
330
+ "step": 490
331
+ },
332
+ {
333
+ "epoch": 3.08,
334
+ "learning_rate": 9.259259259259259e-06,
335
+ "loss": 0.0257,
336
+ "step": 500
337
+ },
338
+ {
339
+ "epoch": 3.1,
340
+ "learning_rate": 8.333333333333334e-06,
341
+ "loss": 0.0935,
342
+ "step": 510
343
+ },
344
+ {
345
+ "epoch": 3.12,
346
+ "learning_rate": 7.4074074074074075e-06,
347
+ "loss": 0.1192,
348
+ "step": 520
349
+ },
350
+ {
351
+ "epoch": 3.13,
352
+ "learning_rate": 6.481481481481481e-06,
353
+ "loss": 0.0363,
354
+ "step": 530
355
+ },
356
+ {
357
+ "epoch": 3.15,
358
+ "learning_rate": 5.555555555555556e-06,
359
+ "loss": 0.0212,
360
+ "step": 540
361
+ },
362
+ {
363
+ "epoch": 3.17,
364
+ "learning_rate": 4.6296296296296296e-06,
365
+ "loss": 0.2358,
366
+ "step": 550
367
+ },
368
+ {
369
+ "epoch": 3.18,
370
+ "learning_rate": 3.7037037037037037e-06,
371
+ "loss": 0.208,
372
+ "step": 560
373
+ },
374
+ {
375
+ "epoch": 3.2,
376
+ "learning_rate": 2.777777777777778e-06,
377
+ "loss": 0.2425,
378
+ "step": 570
379
+ },
380
+ {
381
+ "epoch": 3.22,
382
+ "learning_rate": 1.8518518518518519e-06,
383
+ "loss": 0.1185,
384
+ "step": 580
385
+ },
386
+ {
387
+ "epoch": 3.23,
388
+ "learning_rate": 9.259259259259259e-07,
389
+ "loss": 0.4055,
390
+ "step": 590
391
+ },
392
+ {
393
+ "epoch": 3.25,
394
+ "learning_rate": 0.0,
395
+ "loss": 0.4202,
396
+ "step": 600
397
+ },
398
+ {
399
+ "epoch": 3.25,
400
+ "eval_accuracy": 0.9142857142857143,
401
+ "eval_loss": 0.19511641561985016,
402
+ "eval_runtime": 17.2593,
403
+ "eval_samples_per_second": 4.056,
404
+ "eval_steps_per_second": 2.028,
405
+ "step": 600
406
+ },
407
+ {
408
+ "epoch": 3.25,
409
+ "step": 600,
410
+ "total_flos": 1.495384188125184e+18,
411
+ "train_loss": 0.7811511262754599,
412
+ "train_runtime": 645.0345,
413
+ "train_samples_per_second": 1.86,
414
+ "train_steps_per_second": 0.93
415
+ },
416
+ {
417
+ "epoch": 3.25,
418
+ "eval_accuracy": 0.9419354838709677,
419
+ "eval_loss": 0.1840752363204956,
420
+ "eval_runtime": 40.073,
421
+ "eval_samples_per_second": 3.868,
422
+ "eval_steps_per_second": 1.946,
423
+ "step": 600
424
+ },
425
+ {
426
+ "epoch": 3.25,
427
+ "eval_accuracy": 0.9419354838709677,
428
+ "eval_loss": 0.1840752214193344,
429
+ "eval_runtime": 33.5289,
430
+ "eval_samples_per_second": 4.623,
431
+ "eval_steps_per_second": 2.326,
432
+ "step": 600
433
+ }
434
+ ],
435
+ "logging_steps": 10,
436
+ "max_steps": 600,
437
+ "num_train_epochs": 9223372036854775807,
438
+ "save_steps": 500,
439
+ "total_flos": 1.495384188125184e+18,
440
+ "trial_name": null,
441
+ "trial_params": null
442
+ }