Joy28 commited on
Commit
86ac860
·
verified ·
1 Parent(s): 9a561cc

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -6
  2. test_results.json +6 -6
  3. trainer_state.json +1300 -454
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 19.04,
3
- "eval_accuracy": 0.7129629629629629,
4
- "eval_loss": 0.7363542914390564,
5
- "eval_runtime": 168.5999,
6
- "eval_samples_per_second": 1.281,
7
- "eval_steps_per_second": 0.16
8
  }
 
1
  {
2
+ "epoch": 39.02,
3
+ "eval_accuracy": 0.6342592592592593,
4
+ "eval_loss": 0.8682467341423035,
5
+ "eval_runtime": 165.9338,
6
+ "eval_samples_per_second": 1.302,
7
+ "eval_steps_per_second": 0.163
8
  }
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 19.04,
3
- "eval_accuracy": 0.7129629629629629,
4
- "eval_loss": 0.7363542914390564,
5
- "eval_runtime": 168.5999,
6
- "eval_samples_per_second": 1.281,
7
- "eval_steps_per_second": 0.16
8
  }
 
1
  {
2
+ "epoch": 39.02,
3
+ "eval_accuracy": 0.6342592592592593,
4
+ "eval_loss": 0.8682467341423035,
5
+ "eval_runtime": 165.9338,
6
+ "eval_samples_per_second": 1.302,
7
+ "eval_steps_per_second": 0.163
8
  }
trainer_state.json CHANGED
@@ -1,893 +1,1739 @@
1
  {
2
- "best_metric": 0.7327188940092166,
3
- "best_model_checkpoint": "videomae-base-finetuned-subset-check10/checkpoint-896",
4
- "epoch": 19.041441441441442,
5
  "eval_steps": 500,
6
- "global_step": 1110,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.01,
13
- "learning_rate": 9.00900900900901e-07,
14
- "loss": 1.6459,
15
  "step": 10
16
  },
17
  {
18
- "epoch": 0.02,
19
- "learning_rate": 1.801801801801802e-06,
20
- "loss": 1.636,
21
  "step": 20
22
  },
23
  {
24
- "epoch": 0.03,
25
- "learning_rate": 2.702702702702703e-06,
26
- "loss": 1.6904,
27
  "step": 30
28
  },
29
  {
30
- "epoch": 0.04,
31
- "learning_rate": 3.603603603603604e-06,
32
- "loss": 1.6413,
33
  "step": 40
34
  },
35
  {
36
- "epoch": 0.05,
37
- "learning_rate": 4.504504504504505e-06,
38
- "loss": 1.6285,
39
  "step": 50
40
  },
41
  {
42
- "epoch": 0.05,
43
- "eval_accuracy": 0.3686635944700461,
44
- "eval_loss": 1.6004022359848022,
45
- "eval_runtime": 173.9244,
46
- "eval_samples_per_second": 1.248,
47
- "eval_steps_per_second": 0.161,
48
  "step": 56
49
  },
50
  {
51
  "epoch": 1.0,
52
- "learning_rate": 5.405405405405406e-06,
53
- "loss": 1.63,
54
  "step": 60
55
  },
56
  {
57
  "epoch": 1.01,
58
- "learning_rate": 6.3063063063063065e-06,
59
- "loss": 1.5959,
60
  "step": 70
61
  },
62
  {
63
- "epoch": 1.02,
64
- "learning_rate": 7.207207207207208e-06,
65
- "loss": 1.6112,
66
  "step": 80
67
  },
68
  {
69
- "epoch": 1.03,
70
- "learning_rate": 8.108108108108109e-06,
71
- "loss": 1.6242,
72
  "step": 90
73
  },
74
  {
75
- "epoch": 1.04,
76
- "learning_rate": 9.00900900900901e-06,
77
- "loss": 1.5879,
78
  "step": 100
79
  },
80
  {
81
- "epoch": 1.05,
82
- "learning_rate": 9.90990990990991e-06,
83
- "loss": 1.6056,
84
  "step": 110
85
  },
86
  {
87
- "epoch": 1.05,
88
- "eval_accuracy": 0.19815668202764977,
89
- "eval_loss": 1.5825837850570679,
90
- "eval_runtime": 167.123,
91
- "eval_samples_per_second": 1.298,
92
- "eval_steps_per_second": 0.168,
93
  "step": 112
94
  },
95
  {
96
- "epoch": 2.01,
97
- "learning_rate": 9.90990990990991e-06,
98
- "loss": 1.5988,
99
  "step": 120
100
  },
101
  {
102
- "epoch": 2.02,
103
- "learning_rate": 9.80980980980981e-06,
104
- "loss": 1.5945,
105
  "step": 130
106
  },
107
  {
108
- "epoch": 2.03,
109
- "learning_rate": 9.70970970970971e-06,
110
- "loss": 1.6349,
111
  "step": 140
112
  },
113
  {
114
- "epoch": 2.03,
115
- "learning_rate": 9.60960960960961e-06,
116
- "loss": 1.5828,
117
  "step": 150
118
  },
119
  {
120
- "epoch": 2.04,
121
- "learning_rate": 9.50950950950951e-06,
122
- "loss": 1.5254,
123
  "step": 160
124
  },
125
  {
126
- "epoch": 2.05,
127
- "eval_accuracy": 0.14746543778801843,
128
- "eval_loss": 1.5978792905807495,
129
- "eval_runtime": 166.6054,
130
- "eval_samples_per_second": 1.302,
131
- "eval_steps_per_second": 0.168,
132
  "step": 168
133
  },
134
  {
135
  "epoch": 3.0,
136
- "learning_rate": 9.40940940940941e-06,
137
- "loss": 1.5397,
138
  "step": 170
139
  },
140
  {
141
  "epoch": 3.01,
142
- "learning_rate": 9.30930930930931e-06,
143
- "loss": 1.5011,
144
  "step": 180
145
  },
146
  {
147
- "epoch": 3.02,
148
- "learning_rate": 9.20920920920921e-06,
149
- "loss": 1.4947,
150
  "step": 190
151
  },
152
  {
153
- "epoch": 3.03,
154
- "learning_rate": 9.10910910910911e-06,
155
- "loss": 1.4436,
156
  "step": 200
157
  },
158
  {
159
- "epoch": 3.04,
160
- "learning_rate": 9.00900900900901e-06,
161
- "loss": 1.3674,
162
  "step": 210
163
  },
164
  {
165
- "epoch": 3.05,
166
- "learning_rate": 8.90890890890891e-06,
167
- "loss": 1.4239,
168
  "step": 220
169
  },
170
  {
171
- "epoch": 3.05,
172
- "eval_accuracy": 0.6359447004608295,
173
- "eval_loss": 1.2076131105422974,
174
- "eval_runtime": 167.766,
175
- "eval_samples_per_second": 1.293,
176
- "eval_steps_per_second": 0.167,
177
  "step": 224
178
  },
179
  {
180
- "epoch": 4.01,
181
- "learning_rate": 8.80880880880881e-06,
182
- "loss": 1.4136,
183
  "step": 230
184
  },
185
  {
186
  "epoch": 4.01,
187
- "learning_rate": 8.70870870870871e-06,
188
- "loss": 1.3031,
189
  "step": 240
190
  },
191
  {
192
- "epoch": 4.02,
193
- "learning_rate": 8.60860860860861e-06,
194
- "loss": 1.3306,
195
  "step": 250
196
  },
197
  {
198
- "epoch": 4.03,
199
- "learning_rate": 8.50850850850851e-06,
200
- "loss": 1.2416,
201
  "step": 260
202
  },
203
  {
204
- "epoch": 4.04,
205
- "learning_rate": 8.408408408408409e-06,
206
- "loss": 1.1851,
207
  "step": 270
208
  },
209
  {
210
- "epoch": 4.05,
211
- "learning_rate": 8.308308308308309e-06,
212
- "loss": 1.1884,
213
  "step": 280
214
  },
215
  {
216
- "epoch": 4.05,
217
- "eval_accuracy": 0.5576036866359447,
218
- "eval_loss": 1.1865636110305786,
219
- "eval_runtime": 165.9475,
220
- "eval_samples_per_second": 1.308,
221
- "eval_steps_per_second": 0.169,
222
  "step": 280
223
  },
224
  {
225
- "epoch": 5.01,
226
- "learning_rate": 8.208208208208209e-06,
227
- "loss": 1.3263,
228
  "step": 290
229
  },
230
  {
231
- "epoch": 5.02,
232
- "learning_rate": 8.108108108108109e-06,
233
- "loss": 1.2252,
234
  "step": 300
235
  },
236
  {
237
- "epoch": 5.03,
238
- "learning_rate": 8.00800800800801e-06,
239
- "loss": 1.2029,
240
  "step": 310
241
  },
242
  {
243
- "epoch": 5.04,
244
- "learning_rate": 7.90790790790791e-06,
245
- "loss": 1.1463,
246
  "step": 320
247
  },
248
  {
249
- "epoch": 5.05,
250
- "learning_rate": 7.807807807807808e-06,
251
- "loss": 1.2336,
252
  "step": 330
253
  },
254
  {
255
- "epoch": 5.05,
256
- "eval_accuracy": 0.6405529953917051,
257
- "eval_loss": 1.028122067451477,
258
- "eval_runtime": 165.6809,
259
- "eval_samples_per_second": 1.31,
260
- "eval_steps_per_second": 0.169,
261
  "step": 336
262
  },
263
  {
264
  "epoch": 6.0,
265
- "learning_rate": 7.707707707707708e-06,
266
- "loss": 1.224,
267
  "step": 340
268
  },
269
  {
270
  "epoch": 6.01,
271
- "learning_rate": 7.607607607607608e-06,
272
- "loss": 1.1512,
273
  "step": 350
274
  },
275
  {
276
- "epoch": 6.02,
277
- "learning_rate": 7.507507507507507e-06,
278
- "loss": 1.1913,
279
  "step": 360
280
  },
281
  {
282
- "epoch": 6.03,
283
- "learning_rate": 7.4074074074074075e-06,
284
- "loss": 1.0144,
285
  "step": 370
286
  },
287
  {
288
- "epoch": 6.04,
289
- "learning_rate": 7.307307307307308e-06,
290
- "loss": 1.0578,
291
  "step": 380
292
  },
293
  {
294
- "epoch": 6.05,
295
- "learning_rate": 7.207207207207208e-06,
296
- "loss": 1.0726,
297
  "step": 390
298
  },
299
  {
300
- "epoch": 6.05,
301
- "eval_accuracy": 0.663594470046083,
302
- "eval_loss": 0.9172731637954712,
303
- "eval_runtime": 166.3821,
304
- "eval_samples_per_second": 1.304,
305
- "eval_steps_per_second": 0.168,
306
  "step": 392
307
  },
308
  {
309
- "epoch": 7.01,
310
- "learning_rate": 7.107107107107107e-06,
311
- "loss": 1.084,
312
  "step": 400
313
  },
314
  {
315
- "epoch": 7.02,
316
- "learning_rate": 7.007007007007007e-06,
317
- "loss": 1.0031,
318
  "step": 410
319
  },
320
  {
321
- "epoch": 7.03,
322
- "learning_rate": 6.906906906906907e-06,
323
- "loss": 0.997,
324
  "step": 420
325
  },
326
  {
327
- "epoch": 7.03,
328
- "learning_rate": 6.8068068068068075e-06,
329
- "loss": 1.1918,
330
  "step": 430
331
  },
332
  {
333
- "epoch": 7.04,
334
- "learning_rate": 6.706706706706707e-06,
335
- "loss": 1.0966,
336
  "step": 440
337
  },
338
  {
339
- "epoch": 7.05,
340
- "eval_accuracy": 0.5990783410138248,
341
- "eval_loss": 1.0597409009933472,
342
- "eval_runtime": 164.5381,
343
- "eval_samples_per_second": 1.319,
344
- "eval_steps_per_second": 0.17,
345
  "step": 448
346
  },
347
  {
348
  "epoch": 8.0,
349
- "learning_rate": 6.606606606606607e-06,
350
- "loss": 1.1823,
351
  "step": 450
352
  },
353
  {
354
  "epoch": 8.01,
355
- "learning_rate": 6.506506506506507e-06,
356
- "loss": 0.9391,
357
  "step": 460
358
  },
359
  {
360
- "epoch": 8.02,
361
- "learning_rate": 6.406406406406407e-06,
362
- "loss": 1.1039,
363
  "step": 470
364
  },
365
  {
366
- "epoch": 8.03,
367
- "learning_rate": 6.3063063063063065e-06,
368
- "loss": 1.1439,
369
  "step": 480
370
  },
371
  {
372
- "epoch": 8.04,
373
- "learning_rate": 6.206206206206207e-06,
374
- "loss": 0.9368,
375
  "step": 490
376
  },
377
  {
378
- "epoch": 8.05,
379
- "learning_rate": 6.106106106106107e-06,
380
- "loss": 0.956,
381
  "step": 500
382
  },
383
  {
384
- "epoch": 8.05,
385
- "eval_accuracy": 0.6682027649769585,
386
- "eval_loss": 0.9277688264846802,
387
- "eval_runtime": 165.822,
388
- "eval_samples_per_second": 1.309,
389
  "eval_steps_per_second": 0.169,
390
  "step": 504
391
  },
392
  {
393
- "epoch": 9.01,
394
- "learning_rate": 6.006006006006007e-06,
395
- "loss": 0.9728,
396
  "step": 510
397
  },
398
  {
399
  "epoch": 9.01,
400
- "learning_rate": 5.905905905905906e-06,
401
- "loss": 1.1136,
402
  "step": 520
403
  },
404
  {
405
- "epoch": 9.02,
406
- "learning_rate": 5.805805805805806e-06,
407
- "loss": 0.9854,
408
  "step": 530
409
  },
410
  {
411
- "epoch": 9.03,
412
- "learning_rate": 5.7057057057057065e-06,
413
- "loss": 0.9559,
414
  "step": 540
415
  },
416
  {
417
- "epoch": 9.04,
418
- "learning_rate": 5.605605605605607e-06,
419
- "loss": 0.9154,
420
  "step": 550
421
  },
422
  {
423
- "epoch": 9.05,
424
- "learning_rate": 5.505505505505506e-06,
425
- "loss": 1.0813,
426
  "step": 560
427
  },
428
  {
429
- "epoch": 9.05,
430
- "eval_accuracy": 0.5714285714285714,
431
- "eval_loss": 1.0024998188018799,
432
- "eval_runtime": 167.438,
433
- "eval_samples_per_second": 1.296,
434
- "eval_steps_per_second": 0.167,
435
  "step": 560
436
  },
437
  {
438
- "epoch": 10.01,
439
- "learning_rate": 5.405405405405406e-06,
440
- "loss": 0.949,
441
  "step": 570
442
  },
443
  {
444
- "epoch": 10.02,
445
- "learning_rate": 5.305305305305306e-06,
446
- "loss": 0.9489,
447
  "step": 580
448
  },
449
  {
450
- "epoch": 10.03,
451
- "learning_rate": 5.205205205205206e-06,
452
- "loss": 1.0595,
453
  "step": 590
454
  },
455
  {
456
- "epoch": 10.04,
457
- "learning_rate": 5.105105105105106e-06,
458
- "loss": 0.8802,
459
  "step": 600
460
  },
461
  {
462
- "epoch": 10.05,
463
- "learning_rate": 5.005005005005006e-06,
464
- "loss": 0.8996,
465
  "step": 610
466
  },
467
  {
468
- "epoch": 10.05,
469
- "eval_accuracy": 0.7050691244239631,
470
- "eval_loss": 0.788213849067688,
471
- "eval_runtime": 164.6073,
472
- "eval_samples_per_second": 1.318,
473
  "eval_steps_per_second": 0.17,
474
  "step": 616
475
  },
476
  {
477
  "epoch": 11.0,
478
- "learning_rate": 4.904904904904905e-06,
479
- "loss": 0.9075,
480
  "step": 620
481
  },
482
  {
483
  "epoch": 11.01,
484
- "learning_rate": 4.804804804804805e-06,
485
- "loss": 0.921,
486
  "step": 630
487
  },
488
  {
489
- "epoch": 11.02,
490
- "learning_rate": 4.704704704704705e-06,
491
- "loss": 0.93,
492
  "step": 640
493
  },
494
  {
495
- "epoch": 11.03,
496
- "learning_rate": 4.604604604604605e-06,
497
- "loss": 0.7948,
498
  "step": 650
499
  },
500
  {
501
- "epoch": 11.04,
502
- "learning_rate": 4.504504504504505e-06,
503
- "loss": 0.9116,
504
  "step": 660
505
  },
506
  {
507
- "epoch": 11.05,
508
- "learning_rate": 4.404404404404405e-06,
509
- "loss": 0.947,
510
  "step": 670
511
  },
512
  {
513
- "epoch": 11.05,
514
- "eval_accuracy": 0.7188940092165899,
515
- "eval_loss": 0.8638418316841125,
516
- "eval_runtime": 164.8412,
517
- "eval_samples_per_second": 1.316,
518
- "eval_steps_per_second": 0.17,
519
  "step": 672
520
  },
521
  {
522
- "epoch": 12.01,
523
- "learning_rate": 4.304304304304305e-06,
524
- "loss": 0.9901,
525
  "step": 680
526
  },
527
  {
528
- "epoch": 12.02,
529
- "learning_rate": 4.204204204204204e-06,
530
- "loss": 0.8267,
531
  "step": 690
532
  },
533
  {
534
- "epoch": 12.03,
535
- "learning_rate": 4.1041041041041045e-06,
536
- "loss": 0.9112,
537
  "step": 700
538
  },
539
  {
540
- "epoch": 12.03,
541
- "learning_rate": 4.004004004004005e-06,
542
- "loss": 0.8341,
543
  "step": 710
544
  },
545
  {
546
- "epoch": 12.04,
547
- "learning_rate": 3.903903903903904e-06,
548
- "loss": 0.9386,
549
  "step": 720
550
  },
551
  {
552
- "epoch": 12.05,
553
- "eval_accuracy": 0.7004608294930875,
554
- "eval_loss": 0.8191553354263306,
555
- "eval_runtime": 164.8134,
556
- "eval_samples_per_second": 1.317,
557
- "eval_steps_per_second": 0.17,
558
  "step": 728
559
  },
560
  {
561
  "epoch": 13.0,
562
- "learning_rate": 3.803803803803804e-06,
563
- "loss": 0.9771,
564
  "step": 730
565
  },
566
  {
567
  "epoch": 13.01,
568
- "learning_rate": 3.7037037037037037e-06,
569
- "loss": 0.8544,
570
  "step": 740
571
  },
572
  {
573
- "epoch": 13.02,
574
- "learning_rate": 3.603603603603604e-06,
575
- "loss": 0.997,
576
  "step": 750
577
  },
578
  {
579
- "epoch": 13.03,
580
- "learning_rate": 3.5035035035035036e-06,
581
- "loss": 0.9852,
582
  "step": 760
583
  },
584
  {
585
- "epoch": 13.04,
586
- "learning_rate": 3.4034034034034037e-06,
587
- "loss": 0.946,
588
  "step": 770
589
  },
590
  {
591
- "epoch": 13.05,
592
- "learning_rate": 3.3033033033033035e-06,
593
- "loss": 0.8754,
594
  "step": 780
595
  },
596
  {
597
- "epoch": 13.05,
598
- "eval_accuracy": 0.695852534562212,
599
- "eval_loss": 0.7741014361381531,
600
- "eval_runtime": 164.9986,
601
  "eval_samples_per_second": 1.315,
602
  "eval_steps_per_second": 0.17,
603
  "step": 784
604
  },
605
  {
606
- "epoch": 14.01,
607
- "learning_rate": 3.2032032032032036e-06,
608
- "loss": 0.9146,
609
  "step": 790
610
  },
611
  {
612
  "epoch": 14.01,
613
- "learning_rate": 3.1031031031031033e-06,
614
- "loss": 0.8221,
615
  "step": 800
616
  },
617
  {
618
- "epoch": 14.02,
619
- "learning_rate": 3.0030030030030034e-06,
620
- "loss": 0.8436,
621
  "step": 810
622
  },
623
  {
624
- "epoch": 14.03,
625
- "learning_rate": 2.902902902902903e-06,
626
- "loss": 0.812,
627
  "step": 820
628
  },
629
  {
630
- "epoch": 14.04,
631
- "learning_rate": 2.8028028028028033e-06,
632
- "loss": 0.8285,
633
  "step": 830
634
  },
635
  {
636
- "epoch": 14.05,
637
- "learning_rate": 2.702702702702703e-06,
638
- "loss": 0.8028,
639
  "step": 840
640
  },
641
  {
642
- "epoch": 14.05,
643
- "eval_accuracy": 0.7096774193548387,
644
- "eval_loss": 0.748151421546936,
645
- "eval_runtime": 165.2783,
646
- "eval_samples_per_second": 1.313,
647
- "eval_steps_per_second": 0.169,
648
  "step": 840
649
  },
650
  {
651
- "epoch": 15.01,
652
- "learning_rate": 2.602602602602603e-06,
653
- "loss": 0.9496,
654
  "step": 850
655
  },
656
  {
657
- "epoch": 15.02,
658
- "learning_rate": 2.502502502502503e-06,
659
- "loss": 0.8661,
660
  "step": 860
661
  },
662
  {
663
- "epoch": 15.03,
664
- "learning_rate": 2.4024024024024026e-06,
665
- "loss": 0.8948,
666
  "step": 870
667
  },
668
  {
669
- "epoch": 15.04,
670
- "learning_rate": 2.3023023023023023e-06,
671
- "loss": 0.7513,
672
  "step": 880
673
  },
674
  {
675
- "epoch": 15.05,
676
- "learning_rate": 2.2022022022022024e-06,
677
- "loss": 0.7038,
678
  "step": 890
679
  },
680
  {
681
- "epoch": 15.05,
682
- "eval_accuracy": 0.7327188940092166,
683
- "eval_loss": 0.7463698983192444,
684
- "eval_runtime": 165.2407,
685
- "eval_samples_per_second": 1.313,
686
  "eval_steps_per_second": 0.169,
687
  "step": 896
688
  },
689
  {
690
  "epoch": 16.0,
691
- "learning_rate": 2.102102102102102e-06,
692
- "loss": 0.8482,
693
  "step": 900
694
  },
695
  {
696
  "epoch": 16.01,
697
- "learning_rate": 2.0020020020020023e-06,
698
- "loss": 0.7576,
699
  "step": 910
700
  },
701
  {
702
- "epoch": 16.02,
703
- "learning_rate": 1.901901901901902e-06,
704
- "loss": 0.8506,
705
  "step": 920
706
  },
707
  {
708
- "epoch": 16.03,
709
- "learning_rate": 1.801801801801802e-06,
710
- "loss": 0.8435,
711
  "step": 930
712
  },
713
  {
714
- "epoch": 16.04,
715
- "learning_rate": 1.7017017017017019e-06,
716
- "loss": 0.9429,
717
  "step": 940
718
  },
719
  {
720
- "epoch": 16.05,
721
- "learning_rate": 1.6016016016016018e-06,
722
- "loss": 0.6302,
723
  "step": 950
724
  },
725
  {
726
- "epoch": 16.05,
727
- "eval_accuracy": 0.7004608294930875,
728
- "eval_loss": 0.7732542753219604,
729
- "eval_runtime": 166.0375,
730
- "eval_samples_per_second": 1.307,
731
- "eval_steps_per_second": 0.169,
732
  "step": 952
733
  },
734
  {
735
- "epoch": 17.01,
736
- "learning_rate": 1.5015015015015017e-06,
737
- "loss": 0.8215,
738
  "step": 960
739
  },
740
  {
741
- "epoch": 17.02,
742
- "learning_rate": 1.4014014014014016e-06,
743
- "loss": 0.7503,
744
  "step": 970
745
  },
746
  {
747
- "epoch": 17.03,
748
- "learning_rate": 1.3013013013013016e-06,
749
- "loss": 0.8131,
750
  "step": 980
751
  },
752
  {
753
- "epoch": 17.03,
754
- "learning_rate": 1.2012012012012013e-06,
755
- "loss": 0.7954,
756
  "step": 990
757
  },
758
  {
759
- "epoch": 17.04,
760
- "learning_rate": 1.1011011011011012e-06,
761
- "loss": 0.8387,
762
  "step": 1000
763
  },
764
  {
765
- "epoch": 17.05,
766
- "eval_accuracy": 0.7235023041474654,
767
- "eval_loss": 0.722377598285675,
768
- "eval_runtime": 164.6978,
769
- "eval_samples_per_second": 1.318,
770
  "eval_steps_per_second": 0.17,
771
  "step": 1008
772
  },
773
  {
774
  "epoch": 18.0,
775
- "learning_rate": 1.0010010010010011e-06,
776
- "loss": 0.9197,
777
  "step": 1010
778
  },
779
  {
780
  "epoch": 18.01,
781
- "learning_rate": 9.00900900900901e-07,
782
- "loss": 0.8602,
783
  "step": 1020
784
  },
785
  {
786
- "epoch": 18.02,
787
- "learning_rate": 8.008008008008009e-07,
788
- "loss": 0.706,
789
  "step": 1030
790
  },
791
  {
792
- "epoch": 18.03,
793
- "learning_rate": 7.007007007007008e-07,
794
- "loss": 0.878,
795
  "step": 1040
796
  },
797
  {
798
- "epoch": 18.04,
799
- "learning_rate": 6.006006006006006e-07,
800
- "loss": 0.8754,
801
  "step": 1050
802
  },
803
  {
804
- "epoch": 18.05,
805
- "learning_rate": 5.005005005005006e-07,
806
- "loss": 0.5853,
807
  "step": 1060
808
  },
809
  {
810
- "epoch": 18.05,
811
- "eval_accuracy": 0.7142857142857143,
812
- "eval_loss": 0.7359205484390259,
813
- "eval_runtime": 165.3361,
814
- "eval_samples_per_second": 1.312,
815
- "eval_steps_per_second": 0.169,
816
  "step": 1064
817
  },
818
  {
819
- "epoch": 19.01,
820
- "learning_rate": 4.0040040040040045e-07,
821
- "loss": 0.7846,
822
  "step": 1070
823
  },
824
  {
825
  "epoch": 19.01,
826
- "learning_rate": 3.003003003003003e-07,
827
- "loss": 0.9235,
828
  "step": 1080
829
  },
830
  {
831
- "epoch": 19.02,
832
- "learning_rate": 2.0020020020020022e-07,
833
- "loss": 0.9745,
834
  "step": 1090
835
  },
836
  {
837
- "epoch": 19.03,
838
- "learning_rate": 1.0010010010010011e-07,
839
- "loss": 0.6344,
840
  "step": 1100
841
  },
842
  {
843
- "epoch": 19.04,
844
- "learning_rate": 0.0,
845
- "loss": 0.7482,
846
  "step": 1110
847
  },
848
  {
849
- "epoch": 19.04,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
850
  "eval_accuracy": 0.7188940092165899,
851
- "eval_loss": 0.7369951605796814,
852
- "eval_runtime": 166.4385,
853
- "eval_samples_per_second": 1.304,
854
- "eval_steps_per_second": 0.168,
855
- "step": 1110
856
  },
857
  {
858
- "epoch": 19.04,
859
- "step": 1110,
860
- "total_flos": 1.1041673732998595e+19,
861
- "train_loss": 1.081317645150262,
862
- "train_runtime": 11661.5957,
863
- "train_samples_per_second": 0.761,
864
- "train_steps_per_second": 0.095
865
  },
866
  {
867
- "epoch": 19.04,
868
- "eval_accuracy": 0.7129629629629629,
869
- "eval_loss": 0.7363542914390564,
870
- "eval_runtime": 167.4287,
871
- "eval_samples_per_second": 1.29,
872
- "eval_steps_per_second": 0.161,
873
- "step": 1110
874
  },
875
  {
876
- "epoch": 19.04,
877
- "eval_accuracy": 0.7129629629629629,
878
- "eval_loss": 0.7363542914390564,
879
- "eval_runtime": 168.5999,
880
- "eval_samples_per_second": 1.281,
881
- "eval_steps_per_second": 0.16,
882
- "step": 1110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
883
  }
884
  ],
885
  "logging_steps": 10,
886
- "max_steps": 1110,
887
  "num_input_tokens_seen": 0,
888
  "num_train_epochs": 9223372036854775807,
889
  "save_steps": 500,
890
- "total_flos": 1.1041673732998595e+19,
891
  "train_batch_size": 8,
892
  "trial_name": null,
893
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.728110599078341,
3
+ "best_model_checkpoint": "videomae-base-finetuned-subset-check10/checkpoint-1960",
4
+ "epoch": 39.016216216216215,
5
  "eval_steps": 500,
6
+ "global_step": 2220,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0,
13
+ "learning_rate": 4.504504504504505e-07,
14
+ "loss": 1.5494,
15
  "step": 10
16
  },
17
  {
18
+ "epoch": 0.01,
19
+ "learning_rate": 9.00900900900901e-07,
20
+ "loss": 1.5329,
21
  "step": 20
22
  },
23
  {
24
+ "epoch": 0.01,
25
+ "learning_rate": 1.3513513513513515e-06,
26
+ "loss": 1.5277,
27
  "step": 30
28
  },
29
  {
30
+ "epoch": 0.02,
31
+ "learning_rate": 1.801801801801802e-06,
32
+ "loss": 1.5543,
33
  "step": 40
34
  },
35
  {
36
+ "epoch": 0.02,
37
+ "learning_rate": 2.2522522522522524e-06,
38
+ "loss": 1.5175,
39
  "step": 50
40
  },
41
  {
42
+ "epoch": 0.03,
43
+ "eval_accuracy": 0.2073732718894009,
44
+ "eval_loss": 1.6040809154510498,
45
+ "eval_runtime": 183.7832,
46
+ "eval_samples_per_second": 1.181,
47
+ "eval_steps_per_second": 0.152,
48
  "step": 56
49
  },
50
  {
51
  "epoch": 1.0,
52
+ "learning_rate": 2.702702702702703e-06,
53
+ "loss": 1.4668,
54
  "step": 60
55
  },
56
  {
57
  "epoch": 1.01,
58
+ "learning_rate": 3.1531531531531532e-06,
59
+ "loss": 1.4494,
60
  "step": 70
61
  },
62
  {
63
+ "epoch": 1.01,
64
+ "learning_rate": 3.603603603603604e-06,
65
+ "loss": 1.5305,
66
  "step": 80
67
  },
68
  {
69
+ "epoch": 1.02,
70
+ "learning_rate": 4.0540540540540545e-06,
71
+ "loss": 1.4669,
72
  "step": 90
73
  },
74
  {
75
+ "epoch": 1.02,
76
+ "learning_rate": 4.504504504504505e-06,
77
+ "loss": 1.4467,
78
  "step": 100
79
  },
80
  {
81
+ "epoch": 1.02,
82
+ "learning_rate": 4.954954954954955e-06,
83
+ "loss": 1.4397,
84
  "step": 110
85
  },
86
  {
87
+ "epoch": 1.03,
88
+ "eval_accuracy": 0.3870967741935484,
89
+ "eval_loss": 1.4559013843536377,
90
+ "eval_runtime": 165.5917,
91
+ "eval_samples_per_second": 1.31,
92
+ "eval_steps_per_second": 0.169,
93
  "step": 112
94
  },
95
  {
96
+ "epoch": 2.0,
97
+ "learning_rate": 5.405405405405406e-06,
98
+ "loss": 1.3919,
99
  "step": 120
100
  },
101
  {
102
+ "epoch": 2.01,
103
+ "learning_rate": 5.855855855855856e-06,
104
+ "loss": 1.4257,
105
  "step": 130
106
  },
107
  {
108
+ "epoch": 2.01,
109
+ "learning_rate": 6.3063063063063065e-06,
110
+ "loss": 1.402,
111
  "step": 140
112
  },
113
  {
114
+ "epoch": 2.02,
115
+ "learning_rate": 6.7567567567567575e-06,
116
+ "loss": 1.3609,
117
  "step": 150
118
  },
119
  {
120
+ "epoch": 2.02,
121
+ "learning_rate": 7.207207207207208e-06,
122
+ "loss": 1.464,
123
  "step": 160
124
  },
125
  {
126
+ "epoch": 2.03,
127
+ "eval_accuracy": 0.39631336405529954,
128
+ "eval_loss": 1.3637099266052246,
129
+ "eval_runtime": 165.2932,
130
+ "eval_samples_per_second": 1.313,
131
+ "eval_steps_per_second": 0.169,
132
  "step": 168
133
  },
134
  {
135
  "epoch": 3.0,
136
+ "learning_rate": 7.657657657657658e-06,
137
+ "loss": 1.3384,
138
  "step": 170
139
  },
140
  {
141
  "epoch": 3.01,
142
+ "learning_rate": 8.108108108108109e-06,
143
+ "loss": 1.4317,
144
  "step": 180
145
  },
146
  {
147
+ "epoch": 3.01,
148
+ "learning_rate": 8.55855855855856e-06,
149
+ "loss": 1.1669,
150
  "step": 190
151
  },
152
  {
153
+ "epoch": 3.01,
154
+ "learning_rate": 9.00900900900901e-06,
155
+ "loss": 1.4449,
156
  "step": 200
157
  },
158
  {
159
+ "epoch": 3.02,
160
+ "learning_rate": 9.45945945945946e-06,
161
+ "loss": 1.3429,
162
  "step": 210
163
  },
164
  {
165
+ "epoch": 3.02,
166
+ "learning_rate": 9.90990990990991e-06,
167
+ "loss": 1.3404,
168
  "step": 220
169
  },
170
  {
171
+ "epoch": 3.03,
172
+ "eval_accuracy": 0.4470046082949309,
173
+ "eval_loss": 1.2467495203018188,
174
+ "eval_runtime": 166.7691,
175
+ "eval_samples_per_second": 1.301,
176
+ "eval_steps_per_second": 0.168,
177
  "step": 224
178
  },
179
  {
180
+ "epoch": 4.0,
181
+ "learning_rate": 9.95995995995996e-06,
182
+ "loss": 1.2682,
183
  "step": 230
184
  },
185
  {
186
  "epoch": 4.01,
187
+ "learning_rate": 9.90990990990991e-06,
188
+ "loss": 1.2307,
189
  "step": 240
190
  },
191
  {
192
+ "epoch": 4.01,
193
+ "learning_rate": 9.85985985985986e-06,
194
+ "loss": 1.14,
195
  "step": 250
196
  },
197
  {
198
+ "epoch": 4.02,
199
+ "learning_rate": 9.80980980980981e-06,
200
+ "loss": 1.091,
201
  "step": 260
202
  },
203
  {
204
+ "epoch": 4.02,
205
+ "learning_rate": 9.75975975975976e-06,
206
+ "loss": 1.1822,
207
  "step": 270
208
  },
209
  {
210
+ "epoch": 4.03,
211
+ "learning_rate": 9.70970970970971e-06,
212
+ "loss": 1.3284,
213
  "step": 280
214
  },
215
  {
216
+ "epoch": 4.03,
217
+ "eval_accuracy": 0.3317972350230415,
218
+ "eval_loss": 1.3115123510360718,
219
+ "eval_runtime": 165.1237,
220
+ "eval_samples_per_second": 1.314,
221
+ "eval_steps_per_second": 0.17,
222
  "step": 280
223
  },
224
  {
225
+ "epoch": 5.0,
226
+ "learning_rate": 9.65965965965966e-06,
227
+ "loss": 1.1944,
228
  "step": 290
229
  },
230
  {
231
+ "epoch": 5.01,
232
+ "learning_rate": 9.60960960960961e-06,
233
+ "loss": 1.3055,
234
  "step": 300
235
  },
236
  {
237
+ "epoch": 5.01,
238
+ "learning_rate": 9.55955955955956e-06,
239
+ "loss": 1.0722,
240
  "step": 310
241
  },
242
  {
243
+ "epoch": 5.02,
244
+ "learning_rate": 9.50950950950951e-06,
245
+ "loss": 1.0467,
246
  "step": 320
247
  },
248
  {
249
+ "epoch": 5.02,
250
+ "learning_rate": 9.45945945945946e-06,
251
+ "loss": 1.1598,
252
  "step": 330
253
  },
254
  {
255
+ "epoch": 5.03,
256
+ "eval_accuracy": 0.4470046082949309,
257
+ "eval_loss": 1.2488903999328613,
258
+ "eval_runtime": 168.3906,
259
+ "eval_samples_per_second": 1.289,
260
+ "eval_steps_per_second": 0.166,
261
  "step": 336
262
  },
263
  {
264
  "epoch": 6.0,
265
+ "learning_rate": 9.40940940940941e-06,
266
+ "loss": 1.2285,
267
  "step": 340
268
  },
269
  {
270
  "epoch": 6.01,
271
+ "learning_rate": 9.35935935935936e-06,
272
+ "loss": 1.0002,
273
  "step": 350
274
  },
275
  {
276
+ "epoch": 6.01,
277
+ "learning_rate": 9.30930930930931e-06,
278
+ "loss": 1.2506,
279
  "step": 360
280
  },
281
  {
282
+ "epoch": 6.02,
283
+ "learning_rate": 9.25925925925926e-06,
284
+ "loss": 1.0963,
285
  "step": 370
286
  },
287
  {
288
+ "epoch": 6.02,
289
+ "learning_rate": 9.20920920920921e-06,
290
+ "loss": 1.1495,
291
  "step": 380
292
  },
293
  {
294
+ "epoch": 6.02,
295
+ "learning_rate": 9.15915915915916e-06,
296
+ "loss": 0.9615,
297
  "step": 390
298
  },
299
  {
300
+ "epoch": 6.03,
301
+ "eval_accuracy": 0.4009216589861751,
302
+ "eval_loss": 1.3057225942611694,
303
+ "eval_runtime": 165.4477,
304
+ "eval_samples_per_second": 1.312,
305
+ "eval_steps_per_second": 0.169,
306
  "step": 392
307
  },
308
  {
309
+ "epoch": 7.0,
310
+ "learning_rate": 9.10910910910911e-06,
311
+ "loss": 1.0211,
312
  "step": 400
313
  },
314
  {
315
+ "epoch": 7.01,
316
+ "learning_rate": 9.05905905905906e-06,
317
+ "loss": 1.0622,
318
  "step": 410
319
  },
320
  {
321
+ "epoch": 7.01,
322
+ "learning_rate": 9.00900900900901e-06,
323
+ "loss": 1.1759,
324
  "step": 420
325
  },
326
  {
327
+ "epoch": 7.02,
328
+ "learning_rate": 8.95895895895896e-06,
329
+ "loss": 1.0414,
330
  "step": 430
331
  },
332
  {
333
+ "epoch": 7.02,
334
+ "learning_rate": 8.90890890890891e-06,
335
+ "loss": 0.9357,
336
  "step": 440
337
  },
338
  {
339
+ "epoch": 7.03,
340
+ "eval_accuracy": 0.6497695852534562,
341
+ "eval_loss": 0.9200783967971802,
342
+ "eval_runtime": 165.3979,
343
+ "eval_samples_per_second": 1.312,
344
+ "eval_steps_per_second": 0.169,
345
  "step": 448
346
  },
347
  {
348
  "epoch": 8.0,
349
+ "learning_rate": 8.85885885885886e-06,
350
+ "loss": 1.2506,
351
  "step": 450
352
  },
353
  {
354
  "epoch": 8.01,
355
+ "learning_rate": 8.80880880880881e-06,
356
+ "loss": 1.0342,
357
  "step": 460
358
  },
359
  {
360
+ "epoch": 8.01,
361
+ "learning_rate": 8.75875875875876e-06,
362
+ "loss": 1.0815,
363
  "step": 470
364
  },
365
  {
366
+ "epoch": 8.01,
367
+ "learning_rate": 8.70870870870871e-06,
368
+ "loss": 1.0373,
369
  "step": 480
370
  },
371
  {
372
+ "epoch": 8.02,
373
+ "learning_rate": 8.65865865865866e-06,
374
+ "loss": 0.9051,
375
  "step": 490
376
  },
377
  {
378
+ "epoch": 8.02,
379
+ "learning_rate": 8.60860860860861e-06,
380
+ "loss": 0.9785,
381
  "step": 500
382
  },
383
  {
384
+ "epoch": 8.03,
385
+ "eval_accuracy": 0.6774193548387096,
386
+ "eval_loss": 0.8629115223884583,
387
+ "eval_runtime": 165.2531,
388
+ "eval_samples_per_second": 1.313,
389
  "eval_steps_per_second": 0.169,
390
  "step": 504
391
  },
392
  {
393
+ "epoch": 9.0,
394
+ "learning_rate": 8.55855855855856e-06,
395
+ "loss": 0.9915,
396
  "step": 510
397
  },
398
  {
399
  "epoch": 9.01,
400
+ "learning_rate": 8.50850850850851e-06,
401
+ "loss": 0.9176,
402
  "step": 520
403
  },
404
  {
405
+ "epoch": 9.01,
406
+ "learning_rate": 8.45845845845846e-06,
407
+ "loss": 1.1561,
408
  "step": 530
409
  },
410
  {
411
+ "epoch": 9.02,
412
+ "learning_rate": 8.408408408408409e-06,
413
+ "loss": 0.8966,
414
  "step": 540
415
  },
416
  {
417
+ "epoch": 9.02,
418
+ "learning_rate": 8.358358358358359e-06,
419
+ "loss": 0.894,
420
  "step": 550
421
  },
422
  {
423
+ "epoch": 9.03,
424
+ "learning_rate": 8.308308308308309e-06,
425
+ "loss": 1.0862,
426
  "step": 560
427
  },
428
  {
429
+ "epoch": 9.03,
430
+ "eval_accuracy": 0.5069124423963134,
431
+ "eval_loss": 1.0976766347885132,
432
+ "eval_runtime": 165.1522,
433
+ "eval_samples_per_second": 1.314,
434
+ "eval_steps_per_second": 0.17,
435
  "step": 560
436
  },
437
  {
438
+ "epoch": 10.0,
439
+ "learning_rate": 8.258258258258259e-06,
440
+ "loss": 0.7806,
441
  "step": 570
442
  },
443
  {
444
+ "epoch": 10.01,
445
+ "learning_rate": 8.208208208208209e-06,
446
+ "loss": 0.9904,
447
  "step": 580
448
  },
449
  {
450
+ "epoch": 10.01,
451
+ "learning_rate": 8.158158158158159e-06,
452
+ "loss": 0.9099,
453
  "step": 590
454
  },
455
  {
456
+ "epoch": 10.02,
457
+ "learning_rate": 8.108108108108109e-06,
458
+ "loss": 0.9169,
459
  "step": 600
460
  },
461
  {
462
+ "epoch": 10.02,
463
+ "learning_rate": 8.058058058058059e-06,
464
+ "loss": 0.9315,
465
  "step": 610
466
  },
467
  {
468
+ "epoch": 10.03,
469
+ "eval_accuracy": 0.7096774193548387,
470
+ "eval_loss": 0.7867635488510132,
471
+ "eval_runtime": 165.0158,
472
+ "eval_samples_per_second": 1.315,
473
  "eval_steps_per_second": 0.17,
474
  "step": 616
475
  },
476
  {
477
  "epoch": 11.0,
478
+ "learning_rate": 8.00800800800801e-06,
479
+ "loss": 0.7636,
480
  "step": 620
481
  },
482
  {
483
  "epoch": 11.01,
484
+ "learning_rate": 7.95795795795796e-06,
485
+ "loss": 0.9198,
486
  "step": 630
487
  },
488
  {
489
+ "epoch": 11.01,
490
+ "learning_rate": 7.90790790790791e-06,
491
+ "loss": 1.0816,
492
  "step": 640
493
  },
494
  {
495
+ "epoch": 11.02,
496
+ "learning_rate": 7.85785785785786e-06,
497
+ "loss": 0.8409,
498
  "step": 650
499
  },
500
  {
501
+ "epoch": 11.02,
502
+ "learning_rate": 7.807807807807808e-06,
503
+ "loss": 0.7348,
504
  "step": 660
505
  },
506
  {
507
+ "epoch": 11.02,
508
+ "learning_rate": 7.757757757757758e-06,
509
+ "loss": 0.9404,
510
  "step": 670
511
  },
512
  {
513
+ "epoch": 11.03,
514
+ "eval_accuracy": 0.6728110599078341,
515
+ "eval_loss": 0.8170290589332581,
516
+ "eval_runtime": 165.2251,
517
+ "eval_samples_per_second": 1.313,
518
+ "eval_steps_per_second": 0.169,
519
  "step": 672
520
  },
521
  {
522
+ "epoch": 12.0,
523
+ "learning_rate": 7.707707707707708e-06,
524
+ "loss": 0.8669,
525
  "step": 680
526
  },
527
  {
528
+ "epoch": 12.01,
529
+ "learning_rate": 7.657657657657658e-06,
530
+ "loss": 0.7773,
531
  "step": 690
532
  },
533
  {
534
+ "epoch": 12.01,
535
+ "learning_rate": 7.607607607607608e-06,
536
+ "loss": 0.7599,
537
  "step": 700
538
  },
539
  {
540
+ "epoch": 12.02,
541
+ "learning_rate": 7.557557557557558e-06,
542
+ "loss": 0.9216,
543
  "step": 710
544
  },
545
  {
546
+ "epoch": 12.02,
547
+ "learning_rate": 7.507507507507507e-06,
548
+ "loss": 0.939,
549
  "step": 720
550
  },
551
  {
552
+ "epoch": 12.03,
553
+ "eval_accuracy": 0.663594470046083,
554
+ "eval_loss": 0.9246166348457336,
555
+ "eval_runtime": 165.7726,
556
+ "eval_samples_per_second": 1.309,
557
+ "eval_steps_per_second": 0.169,
558
  "step": 728
559
  },
560
  {
561
  "epoch": 13.0,
562
+ "learning_rate": 7.457457457457457e-06,
563
+ "loss": 0.9703,
564
  "step": 730
565
  },
566
  {
567
  "epoch": 13.01,
568
+ "learning_rate": 7.4074074074074075e-06,
569
+ "loss": 0.6688,
570
  "step": 740
571
  },
572
  {
573
+ "epoch": 13.01,
574
+ "learning_rate": 7.3573573573573575e-06,
575
+ "loss": 0.9097,
576
  "step": 750
577
  },
578
  {
579
+ "epoch": 13.01,
580
+ "learning_rate": 7.307307307307308e-06,
581
+ "loss": 0.9963,
582
  "step": 760
583
  },
584
  {
585
+ "epoch": 13.02,
586
+ "learning_rate": 7.257257257257258e-06,
587
+ "loss": 0.8594,
588
  "step": 770
589
  },
590
  {
591
+ "epoch": 13.02,
592
+ "learning_rate": 7.207207207207208e-06,
593
+ "loss": 0.8205,
594
  "step": 780
595
  },
596
  {
597
+ "epoch": 13.03,
598
+ "eval_accuracy": 0.6866359447004609,
599
+ "eval_loss": 0.8420272469520569,
600
+ "eval_runtime": 165.0282,
601
  "eval_samples_per_second": 1.315,
602
  "eval_steps_per_second": 0.17,
603
  "step": 784
604
  },
605
  {
606
+ "epoch": 14.0,
607
+ "learning_rate": 7.157157157157158e-06,
608
+ "loss": 0.8206,
609
  "step": 790
610
  },
611
  {
612
  "epoch": 14.01,
613
+ "learning_rate": 7.107107107107107e-06,
614
+ "loss": 0.8183,
615
  "step": 800
616
  },
617
  {
618
+ "epoch": 14.01,
619
+ "learning_rate": 7.057057057057057e-06,
620
+ "loss": 0.7596,
621
  "step": 810
622
  },
623
  {
624
+ "epoch": 14.02,
625
+ "learning_rate": 7.007007007007007e-06,
626
+ "loss": 0.799,
627
  "step": 820
628
  },
629
  {
630
+ "epoch": 14.02,
631
+ "learning_rate": 6.956956956956957e-06,
632
+ "loss": 0.7377,
633
  "step": 830
634
  },
635
  {
636
+ "epoch": 14.03,
637
+ "learning_rate": 6.906906906906907e-06,
638
+ "loss": 0.6719,
639
  "step": 840
640
  },
641
  {
642
+ "epoch": 14.03,
643
+ "eval_accuracy": 0.5898617511520737,
644
+ "eval_loss": 1.07249116897583,
645
+ "eval_runtime": 164.1063,
646
+ "eval_samples_per_second": 1.322,
647
+ "eval_steps_per_second": 0.171,
648
  "step": 840
649
  },
650
  {
651
+ "epoch": 15.0,
652
+ "learning_rate": 6.856856856856857e-06,
653
+ "loss": 0.6874,
654
  "step": 850
655
  },
656
  {
657
+ "epoch": 15.01,
658
+ "learning_rate": 6.8068068068068075e-06,
659
+ "loss": 0.8207,
660
  "step": 860
661
  },
662
  {
663
+ "epoch": 15.01,
664
+ "learning_rate": 6.7567567567567575e-06,
665
+ "loss": 0.7454,
666
  "step": 870
667
  },
668
  {
669
+ "epoch": 15.02,
670
+ "learning_rate": 6.706706706706707e-06,
671
+ "loss": 0.7188,
672
  "step": 880
673
  },
674
  {
675
+ "epoch": 15.02,
676
+ "learning_rate": 6.656656656656657e-06,
677
+ "loss": 0.8308,
678
  "step": 890
679
  },
680
  {
681
+ "epoch": 15.03,
682
+ "eval_accuracy": 0.6912442396313364,
683
+ "eval_loss": 0.8682537078857422,
684
+ "eval_runtime": 165.7327,
685
+ "eval_samples_per_second": 1.309,
686
  "eval_steps_per_second": 0.169,
687
  "step": 896
688
  },
689
  {
690
  "epoch": 16.0,
691
+ "learning_rate": 6.606606606606607e-06,
692
+ "loss": 0.891,
693
  "step": 900
694
  },
695
  {
696
  "epoch": 16.01,
697
+ "learning_rate": 6.556556556556557e-06,
698
+ "loss": 0.7597,
699
  "step": 910
700
  },
701
  {
702
+ "epoch": 16.01,
703
+ "learning_rate": 6.506506506506507e-06,
704
+ "loss": 0.8318,
705
  "step": 920
706
  },
707
  {
708
+ "epoch": 16.02,
709
+ "learning_rate": 6.456456456456457e-06,
710
+ "loss": 0.7681,
711
  "step": 930
712
  },
713
  {
714
+ "epoch": 16.02,
715
+ "learning_rate": 6.406406406406407e-06,
716
+ "loss": 1.0094,
717
  "step": 940
718
  },
719
  {
720
+ "epoch": 16.02,
721
+ "learning_rate": 6.356356356356357e-06,
722
+ "loss": 0.7554,
723
  "step": 950
724
  },
725
  {
726
+ "epoch": 16.03,
727
+ "eval_accuracy": 0.5990783410138248,
728
+ "eval_loss": 0.9684067368507385,
729
+ "eval_runtime": 164.5634,
730
+ "eval_samples_per_second": 1.319,
731
+ "eval_steps_per_second": 0.17,
732
  "step": 952
733
  },
734
  {
735
+ "epoch": 17.0,
736
+ "learning_rate": 6.3063063063063065e-06,
737
+ "loss": 0.9087,
738
  "step": 960
739
  },
740
  {
741
+ "epoch": 17.01,
742
+ "learning_rate": 6.2562562562562565e-06,
743
+ "loss": 0.7401,
744
  "step": 970
745
  },
746
  {
747
+ "epoch": 17.01,
748
+ "learning_rate": 6.206206206206207e-06,
749
+ "loss": 0.7611,
750
  "step": 980
751
  },
752
  {
753
+ "epoch": 17.02,
754
+ "learning_rate": 6.156156156156157e-06,
755
+ "loss": 0.6347,
756
  "step": 990
757
  },
758
  {
759
+ "epoch": 17.02,
760
+ "learning_rate": 6.106106106106107e-06,
761
+ "loss": 0.6962,
762
  "step": 1000
763
  },
764
  {
765
+ "epoch": 17.03,
766
+ "eval_accuracy": 0.5483870967741935,
767
+ "eval_loss": 1.1106446981430054,
768
+ "eval_runtime": 165.0022,
769
+ "eval_samples_per_second": 1.315,
770
  "eval_steps_per_second": 0.17,
771
  "step": 1008
772
  },
773
  {
774
  "epoch": 18.0,
775
+ "learning_rate": 6.056056056056057e-06,
776
+ "loss": 0.6861,
777
  "step": 1010
778
  },
779
  {
780
  "epoch": 18.01,
781
+ "learning_rate": 6.006006006006007e-06,
782
+ "loss": 0.7999,
783
  "step": 1020
784
  },
785
  {
786
+ "epoch": 18.01,
787
+ "learning_rate": 5.955955955955957e-06,
788
+ "loss": 0.7412,
789
  "step": 1030
790
  },
791
  {
792
+ "epoch": 18.01,
793
+ "learning_rate": 5.905905905905906e-06,
794
+ "loss": 0.8145,
795
  "step": 1040
796
  },
797
  {
798
+ "epoch": 18.02,
799
+ "learning_rate": 5.855855855855856e-06,
800
+ "loss": 0.6653,
801
  "step": 1050
802
  },
803
  {
804
+ "epoch": 18.02,
805
+ "learning_rate": 5.805805805805806e-06,
806
+ "loss": 0.7995,
807
  "step": 1060
808
  },
809
  {
810
+ "epoch": 18.03,
811
+ "eval_accuracy": 0.6497695852534562,
812
+ "eval_loss": 0.9750950336456299,
813
+ "eval_runtime": 163.7137,
814
+ "eval_samples_per_second": 1.325,
815
+ "eval_steps_per_second": 0.171,
816
  "step": 1064
817
  },
818
  {
819
+ "epoch": 19.0,
820
+ "learning_rate": 5.755755755755756e-06,
821
+ "loss": 0.6923,
822
  "step": 1070
823
  },
824
  {
825
  "epoch": 19.01,
826
+ "learning_rate": 5.7057057057057065e-06,
827
+ "loss": 0.7874,
828
  "step": 1080
829
  },
830
  {
831
+ "epoch": 19.01,
832
+ "learning_rate": 5.6556556556556565e-06,
833
+ "loss": 0.728,
834
  "step": 1090
835
  },
836
  {
837
+ "epoch": 19.02,
838
+ "learning_rate": 5.605605605605607e-06,
839
+ "loss": 0.6329,
840
  "step": 1100
841
  },
842
  {
843
+ "epoch": 19.02,
844
+ "learning_rate": 5.555555555555557e-06,
845
+ "loss": 0.8939,
846
  "step": 1110
847
  },
848
  {
849
+ "epoch": 19.03,
850
+ "learning_rate": 5.505505505505506e-06,
851
+ "loss": 0.8298,
852
+ "step": 1120
853
+ },
854
+ {
855
+ "epoch": 19.03,
856
+ "eval_accuracy": 0.5299539170506913,
857
+ "eval_loss": 1.0630956888198853,
858
+ "eval_runtime": 165.0711,
859
+ "eval_samples_per_second": 1.315,
860
+ "eval_steps_per_second": 0.17,
861
+ "step": 1120
862
+ },
863
+ {
864
+ "epoch": 20.0,
865
+ "learning_rate": 5.455455455455456e-06,
866
+ "loss": 0.7025,
867
+ "step": 1130
868
+ },
869
+ {
870
+ "epoch": 20.01,
871
+ "learning_rate": 5.405405405405406e-06,
872
+ "loss": 0.6251,
873
+ "step": 1140
874
+ },
875
+ {
876
+ "epoch": 20.01,
877
+ "learning_rate": 5.355355355355356e-06,
878
+ "loss": 0.7818,
879
+ "step": 1150
880
+ },
881
+ {
882
+ "epoch": 20.02,
883
+ "learning_rate": 5.305305305305306e-06,
884
+ "loss": 0.8259,
885
+ "step": 1160
886
+ },
887
+ {
888
+ "epoch": 20.02,
889
+ "learning_rate": 5.255255255255256e-06,
890
+ "loss": 0.6607,
891
+ "step": 1170
892
+ },
893
+ {
894
+ "epoch": 20.03,
895
+ "eval_accuracy": 0.6175115207373272,
896
+ "eval_loss": 0.9457883834838867,
897
+ "eval_runtime": 164.6389,
898
+ "eval_samples_per_second": 1.318,
899
+ "eval_steps_per_second": 0.17,
900
+ "step": 1176
901
+ },
902
+ {
903
+ "epoch": 21.0,
904
+ "learning_rate": 5.205205205205206e-06,
905
+ "loss": 0.6344,
906
+ "step": 1180
907
+ },
908
+ {
909
+ "epoch": 21.01,
910
+ "learning_rate": 5.155155155155156e-06,
911
+ "loss": 0.637,
912
+ "step": 1190
913
+ },
914
+ {
915
+ "epoch": 21.01,
916
+ "learning_rate": 5.105105105105106e-06,
917
+ "loss": 0.858,
918
+ "step": 1200
919
+ },
920
+ {
921
+ "epoch": 21.02,
922
+ "learning_rate": 5.055055055055056e-06,
923
+ "loss": 0.6934,
924
+ "step": 1210
925
+ },
926
+ {
927
+ "epoch": 21.02,
928
+ "learning_rate": 5.005005005005006e-06,
929
+ "loss": 0.5714,
930
+ "step": 1220
931
+ },
932
+ {
933
+ "epoch": 21.02,
934
+ "learning_rate": 4.954954954954955e-06,
935
+ "loss": 0.688,
936
+ "step": 1230
937
+ },
938
+ {
939
+ "epoch": 21.03,
940
+ "eval_accuracy": 0.6036866359447005,
941
+ "eval_loss": 1.029563546180725,
942
+ "eval_runtime": 163.856,
943
+ "eval_samples_per_second": 1.324,
944
+ "eval_steps_per_second": 0.171,
945
+ "step": 1232
946
+ },
947
+ {
948
+ "epoch": 22.0,
949
+ "learning_rate": 4.904904904904905e-06,
950
+ "loss": 0.5114,
951
+ "step": 1240
952
+ },
953
+ {
954
+ "epoch": 22.01,
955
+ "learning_rate": 4.854854854854855e-06,
956
+ "loss": 0.6638,
957
+ "step": 1250
958
+ },
959
+ {
960
+ "epoch": 22.01,
961
+ "learning_rate": 4.804804804804805e-06,
962
+ "loss": 0.7833,
963
+ "step": 1260
964
+ },
965
+ {
966
+ "epoch": 22.02,
967
+ "learning_rate": 4.754754754754755e-06,
968
+ "loss": 0.7556,
969
+ "step": 1270
970
+ },
971
+ {
972
+ "epoch": 22.02,
973
+ "learning_rate": 4.704704704704705e-06,
974
+ "loss": 0.5835,
975
+ "step": 1280
976
+ },
977
+ {
978
+ "epoch": 22.03,
979
+ "eval_accuracy": 0.6774193548387096,
980
+ "eval_loss": 0.8947747349739075,
981
+ "eval_runtime": 163.875,
982
+ "eval_samples_per_second": 1.324,
983
+ "eval_steps_per_second": 0.171,
984
+ "step": 1288
985
+ },
986
+ {
987
+ "epoch": 23.0,
988
+ "learning_rate": 4.654654654654655e-06,
989
+ "loss": 0.7209,
990
+ "step": 1290
991
+ },
992
+ {
993
+ "epoch": 23.01,
994
+ "learning_rate": 4.604604604604605e-06,
995
+ "loss": 0.6087,
996
+ "step": 1300
997
+ },
998
+ {
999
+ "epoch": 23.01,
1000
+ "learning_rate": 4.554554554554555e-06,
1001
+ "loss": 0.6529,
1002
+ "step": 1310
1003
+ },
1004
+ {
1005
+ "epoch": 23.01,
1006
+ "learning_rate": 4.504504504504505e-06,
1007
+ "loss": 0.7221,
1008
+ "step": 1320
1009
+ },
1010
+ {
1011
+ "epoch": 23.02,
1012
+ "learning_rate": 4.454454454454455e-06,
1013
+ "loss": 0.6167,
1014
+ "step": 1330
1015
+ },
1016
+ {
1017
+ "epoch": 23.02,
1018
+ "learning_rate": 4.404404404404405e-06,
1019
+ "loss": 0.6987,
1020
+ "step": 1340
1021
+ },
1022
+ {
1023
+ "epoch": 23.03,
1024
  "eval_accuracy": 0.7188940092165899,
1025
+ "eval_loss": 0.7882533669471741,
1026
+ "eval_runtime": 163.4201,
1027
+ "eval_samples_per_second": 1.328,
1028
+ "eval_steps_per_second": 0.171,
1029
+ "step": 1344
1030
  },
1031
  {
1032
+ "epoch": 24.0,
1033
+ "learning_rate": 4.354354354354355e-06,
1034
+ "loss": 0.7234,
1035
+ "step": 1350
 
 
 
1036
  },
1037
  {
1038
+ "epoch": 24.01,
1039
+ "learning_rate": 4.304304304304305e-06,
1040
+ "loss": 0.808,
1041
+ "step": 1360
 
 
 
1042
  },
1043
  {
1044
+ "epoch": 24.01,
1045
+ "learning_rate": 4.254254254254255e-06,
1046
+ "loss": 0.6323,
1047
+ "step": 1370
1048
+ },
1049
+ {
1050
+ "epoch": 24.02,
1051
+ "learning_rate": 4.204204204204204e-06,
1052
+ "loss": 0.5463,
1053
+ "step": 1380
1054
+ },
1055
+ {
1056
+ "epoch": 24.02,
1057
+ "learning_rate": 4.154154154154154e-06,
1058
+ "loss": 0.7978,
1059
+ "step": 1390
1060
+ },
1061
+ {
1062
+ "epoch": 24.03,
1063
+ "learning_rate": 4.1041041041041045e-06,
1064
+ "loss": 0.4979,
1065
+ "step": 1400
1066
+ },
1067
+ {
1068
+ "epoch": 24.03,
1069
+ "eval_accuracy": 0.7188940092165899,
1070
+ "eval_loss": 0.7089133262634277,
1071
+ "eval_runtime": 164.3791,
1072
+ "eval_samples_per_second": 1.32,
1073
+ "eval_steps_per_second": 0.17,
1074
+ "step": 1400
1075
+ },
1076
+ {
1077
+ "epoch": 25.0,
1078
+ "learning_rate": 4.0540540540540545e-06,
1079
+ "loss": 0.4685,
1080
+ "step": 1410
1081
+ },
1082
+ {
1083
+ "epoch": 25.01,
1084
+ "learning_rate": 4.004004004004005e-06,
1085
+ "loss": 0.665,
1086
+ "step": 1420
1087
+ },
1088
+ {
1089
+ "epoch": 25.01,
1090
+ "learning_rate": 3.953953953953955e-06,
1091
+ "loss": 0.6872,
1092
+ "step": 1430
1093
+ },
1094
+ {
1095
+ "epoch": 25.02,
1096
+ "learning_rate": 3.903903903903904e-06,
1097
+ "loss": 0.5854,
1098
+ "step": 1440
1099
+ },
1100
+ {
1101
+ "epoch": 25.02,
1102
+ "learning_rate": 3.853853853853854e-06,
1103
+ "loss": 0.6163,
1104
+ "step": 1450
1105
+ },
1106
+ {
1107
+ "epoch": 25.03,
1108
+ "eval_accuracy": 0.7235023041474654,
1109
+ "eval_loss": 0.7633541226387024,
1110
+ "eval_runtime": 163.5735,
1111
+ "eval_samples_per_second": 1.327,
1112
+ "eval_steps_per_second": 0.171,
1113
+ "step": 1456
1114
+ },
1115
+ {
1116
+ "epoch": 26.0,
1117
+ "learning_rate": 3.803803803803804e-06,
1118
+ "loss": 0.7065,
1119
+ "step": 1460
1120
+ },
1121
+ {
1122
+ "epoch": 26.01,
1123
+ "learning_rate": 3.7537537537537537e-06,
1124
+ "loss": 0.6684,
1125
+ "step": 1470
1126
+ },
1127
+ {
1128
+ "epoch": 26.01,
1129
+ "learning_rate": 3.7037037037037037e-06,
1130
+ "loss": 0.786,
1131
+ "step": 1480
1132
+ },
1133
+ {
1134
+ "epoch": 26.02,
1135
+ "learning_rate": 3.653653653653654e-06,
1136
+ "loss": 0.437,
1137
+ "step": 1490
1138
+ },
1139
+ {
1140
+ "epoch": 26.02,
1141
+ "learning_rate": 3.603603603603604e-06,
1142
+ "loss": 0.6708,
1143
+ "step": 1500
1144
+ },
1145
+ {
1146
+ "epoch": 26.02,
1147
+ "learning_rate": 3.5535535535535535e-06,
1148
+ "loss": 0.6754,
1149
+ "step": 1510
1150
+ },
1151
+ {
1152
+ "epoch": 26.03,
1153
+ "eval_accuracy": 0.6359447004608295,
1154
+ "eval_loss": 0.9443588852882385,
1155
+ "eval_runtime": 163.9867,
1156
+ "eval_samples_per_second": 1.323,
1157
+ "eval_steps_per_second": 0.171,
1158
+ "step": 1512
1159
+ },
1160
+ {
1161
+ "epoch": 27.0,
1162
+ "learning_rate": 3.5035035035035036e-06,
1163
+ "loss": 0.7284,
1164
+ "step": 1520
1165
+ },
1166
+ {
1167
+ "epoch": 27.01,
1168
+ "learning_rate": 3.4534534534534537e-06,
1169
+ "loss": 0.5413,
1170
+ "step": 1530
1171
+ },
1172
+ {
1173
+ "epoch": 27.01,
1174
+ "learning_rate": 3.4034034034034037e-06,
1175
+ "loss": 0.6884,
1176
+ "step": 1540
1177
+ },
1178
+ {
1179
+ "epoch": 27.02,
1180
+ "learning_rate": 3.3533533533533534e-06,
1181
+ "loss": 0.5541,
1182
+ "step": 1550
1183
+ },
1184
+ {
1185
+ "epoch": 27.02,
1186
+ "learning_rate": 3.3033033033033035e-06,
1187
+ "loss": 0.6673,
1188
+ "step": 1560
1189
+ },
1190
+ {
1191
+ "epoch": 27.03,
1192
+ "eval_accuracy": 0.6543778801843319,
1193
+ "eval_loss": 0.8390823602676392,
1194
+ "eval_runtime": 164.572,
1195
+ "eval_samples_per_second": 1.319,
1196
+ "eval_steps_per_second": 0.17,
1197
+ "step": 1568
1198
+ },
1199
+ {
1200
+ "epoch": 28.0,
1201
+ "learning_rate": 3.2532532532532535e-06,
1202
+ "loss": 0.6895,
1203
+ "step": 1570
1204
+ },
1205
+ {
1206
+ "epoch": 28.01,
1207
+ "learning_rate": 3.2032032032032036e-06,
1208
+ "loss": 0.553,
1209
+ "step": 1580
1210
+ },
1211
+ {
1212
+ "epoch": 28.01,
1213
+ "learning_rate": 3.1531531531531532e-06,
1214
+ "loss": 0.4311,
1215
+ "step": 1590
1216
+ },
1217
+ {
1218
+ "epoch": 28.01,
1219
+ "learning_rate": 3.1031031031031033e-06,
1220
+ "loss": 0.6969,
1221
+ "step": 1600
1222
+ },
1223
+ {
1224
+ "epoch": 28.02,
1225
+ "learning_rate": 3.0530530530530534e-06,
1226
+ "loss": 0.7507,
1227
+ "step": 1610
1228
+ },
1229
+ {
1230
+ "epoch": 28.02,
1231
+ "learning_rate": 3.0030030030030034e-06,
1232
+ "loss": 0.4924,
1233
+ "step": 1620
1234
+ },
1235
+ {
1236
+ "epoch": 28.03,
1237
+ "eval_accuracy": 0.6682027649769585,
1238
+ "eval_loss": 0.8288503885269165,
1239
+ "eval_runtime": 164.6437,
1240
+ "eval_samples_per_second": 1.318,
1241
+ "eval_steps_per_second": 0.17,
1242
+ "step": 1624
1243
+ },
1244
+ {
1245
+ "epoch": 29.0,
1246
+ "learning_rate": 2.952952952952953e-06,
1247
+ "loss": 0.7695,
1248
+ "step": 1630
1249
+ },
1250
+ {
1251
+ "epoch": 29.01,
1252
+ "learning_rate": 2.902902902902903e-06,
1253
+ "loss": 0.5981,
1254
+ "step": 1640
1255
+ },
1256
+ {
1257
+ "epoch": 29.01,
1258
+ "learning_rate": 2.8528528528528532e-06,
1259
+ "loss": 0.613,
1260
+ "step": 1650
1261
+ },
1262
+ {
1263
+ "epoch": 29.02,
1264
+ "learning_rate": 2.8028028028028033e-06,
1265
+ "loss": 0.5841,
1266
+ "step": 1660
1267
+ },
1268
+ {
1269
+ "epoch": 29.02,
1270
+ "learning_rate": 2.752752752752753e-06,
1271
+ "loss": 0.7704,
1272
+ "step": 1670
1273
+ },
1274
+ {
1275
+ "epoch": 29.03,
1276
+ "learning_rate": 2.702702702702703e-06,
1277
+ "loss": 0.6438,
1278
+ "step": 1680
1279
+ },
1280
+ {
1281
+ "epoch": 29.03,
1282
+ "eval_accuracy": 0.6129032258064516,
1283
+ "eval_loss": 0.9605286121368408,
1284
+ "eval_runtime": 164.5062,
1285
+ "eval_samples_per_second": 1.319,
1286
+ "eval_steps_per_second": 0.17,
1287
+ "step": 1680
1288
+ },
1289
+ {
1290
+ "epoch": 30.0,
1291
+ "learning_rate": 2.652652652652653e-06,
1292
+ "loss": 0.583,
1293
+ "step": 1690
1294
+ },
1295
+ {
1296
+ "epoch": 30.01,
1297
+ "learning_rate": 2.602602602602603e-06,
1298
+ "loss": 0.7539,
1299
+ "step": 1700
1300
+ },
1301
+ {
1302
+ "epoch": 30.01,
1303
+ "learning_rate": 2.552552552552553e-06,
1304
+ "loss": 0.5418,
1305
+ "step": 1710
1306
+ },
1307
+ {
1308
+ "epoch": 30.02,
1309
+ "learning_rate": 2.502502502502503e-06,
1310
+ "loss": 0.6689,
1311
+ "step": 1720
1312
+ },
1313
+ {
1314
+ "epoch": 30.02,
1315
+ "learning_rate": 2.4524524524524525e-06,
1316
+ "loss": 0.5714,
1317
+ "step": 1730
1318
+ },
1319
+ {
1320
+ "epoch": 30.03,
1321
+ "eval_accuracy": 0.6451612903225806,
1322
+ "eval_loss": 0.8838080763816833,
1323
+ "eval_runtime": 163.4686,
1324
+ "eval_samples_per_second": 1.327,
1325
+ "eval_steps_per_second": 0.171,
1326
+ "step": 1736
1327
+ },
1328
+ {
1329
+ "epoch": 31.0,
1330
+ "learning_rate": 2.4024024024024026e-06,
1331
+ "loss": 0.4737,
1332
+ "step": 1740
1333
+ },
1334
+ {
1335
+ "epoch": 31.01,
1336
+ "learning_rate": 2.3523523523523527e-06,
1337
+ "loss": 0.6563,
1338
+ "step": 1750
1339
+ },
1340
+ {
1341
+ "epoch": 31.01,
1342
+ "learning_rate": 2.3023023023023023e-06,
1343
+ "loss": 0.4469,
1344
+ "step": 1760
1345
+ },
1346
+ {
1347
+ "epoch": 31.02,
1348
+ "learning_rate": 2.2522522522522524e-06,
1349
+ "loss": 0.5655,
1350
+ "step": 1770
1351
+ },
1352
+ {
1353
+ "epoch": 31.02,
1354
+ "learning_rate": 2.2022022022022024e-06,
1355
+ "loss": 0.5421,
1356
+ "step": 1780
1357
+ },
1358
+ {
1359
+ "epoch": 31.02,
1360
+ "learning_rate": 2.1521521521521525e-06,
1361
+ "loss": 0.6726,
1362
+ "step": 1790
1363
+ },
1364
+ {
1365
+ "epoch": 31.03,
1366
+ "eval_accuracy": 0.6589861751152074,
1367
+ "eval_loss": 0.8412278890609741,
1368
+ "eval_runtime": 164.9282,
1369
+ "eval_samples_per_second": 1.316,
1370
+ "eval_steps_per_second": 0.17,
1371
+ "step": 1792
1372
+ },
1373
+ {
1374
+ "epoch": 32.0,
1375
+ "learning_rate": 2.102102102102102e-06,
1376
+ "loss": 0.7254,
1377
+ "step": 1800
1378
+ },
1379
+ {
1380
+ "epoch": 32.01,
1381
+ "learning_rate": 2.0520520520520522e-06,
1382
+ "loss": 0.5397,
1383
+ "step": 1810
1384
+ },
1385
+ {
1386
+ "epoch": 32.01,
1387
+ "learning_rate": 2.0020020020020023e-06,
1388
+ "loss": 0.596,
1389
+ "step": 1820
1390
+ },
1391
+ {
1392
+ "epoch": 32.02,
1393
+ "learning_rate": 1.951951951951952e-06,
1394
+ "loss": 0.5603,
1395
+ "step": 1830
1396
+ },
1397
+ {
1398
+ "epoch": 32.02,
1399
+ "learning_rate": 1.901901901901902e-06,
1400
+ "loss": 0.5027,
1401
+ "step": 1840
1402
+ },
1403
+ {
1404
+ "epoch": 32.03,
1405
+ "eval_accuracy": 0.6728110599078341,
1406
+ "eval_loss": 0.8439391255378723,
1407
+ "eval_runtime": 168.1229,
1408
+ "eval_samples_per_second": 1.291,
1409
+ "eval_steps_per_second": 0.167,
1410
+ "step": 1848
1411
+ },
1412
+ {
1413
+ "epoch": 33.0,
1414
+ "learning_rate": 1.8518518518518519e-06,
1415
+ "loss": 0.6769,
1416
+ "step": 1850
1417
+ },
1418
+ {
1419
+ "epoch": 33.01,
1420
+ "learning_rate": 1.801801801801802e-06,
1421
+ "loss": 0.565,
1422
+ "step": 1860
1423
+ },
1424
+ {
1425
+ "epoch": 33.01,
1426
+ "learning_rate": 1.7517517517517518e-06,
1427
+ "loss": 0.556,
1428
+ "step": 1870
1429
+ },
1430
+ {
1431
+ "epoch": 33.01,
1432
+ "learning_rate": 1.7017017017017019e-06,
1433
+ "loss": 0.4187,
1434
+ "step": 1880
1435
+ },
1436
+ {
1437
+ "epoch": 33.02,
1438
+ "learning_rate": 1.6516516516516517e-06,
1439
+ "loss": 0.4718,
1440
+ "step": 1890
1441
+ },
1442
+ {
1443
+ "epoch": 33.02,
1444
+ "learning_rate": 1.6016016016016018e-06,
1445
+ "loss": 0.4649,
1446
+ "step": 1900
1447
+ },
1448
+ {
1449
+ "epoch": 33.03,
1450
+ "eval_accuracy": 0.6267281105990783,
1451
+ "eval_loss": 0.9524617791175842,
1452
+ "eval_runtime": 169.6407,
1453
+ "eval_samples_per_second": 1.279,
1454
+ "eval_steps_per_second": 0.165,
1455
+ "step": 1904
1456
+ },
1457
+ {
1458
+ "epoch": 34.0,
1459
+ "learning_rate": 1.5515515515515517e-06,
1460
+ "loss": 0.4505,
1461
+ "step": 1910
1462
+ },
1463
+ {
1464
+ "epoch": 34.01,
1465
+ "learning_rate": 1.5015015015015017e-06,
1466
+ "loss": 0.5412,
1467
+ "step": 1920
1468
+ },
1469
+ {
1470
+ "epoch": 34.01,
1471
+ "learning_rate": 1.4514514514514516e-06,
1472
+ "loss": 0.5931,
1473
+ "step": 1930
1474
+ },
1475
+ {
1476
+ "epoch": 34.02,
1477
+ "learning_rate": 1.4014014014014016e-06,
1478
+ "loss": 0.4183,
1479
+ "step": 1940
1480
+ },
1481
+ {
1482
+ "epoch": 34.02,
1483
+ "learning_rate": 1.3513513513513515e-06,
1484
+ "loss": 0.6046,
1485
+ "step": 1950
1486
+ },
1487
+ {
1488
+ "epoch": 34.03,
1489
+ "learning_rate": 1.3013013013013016e-06,
1490
+ "loss": 0.6625,
1491
+ "step": 1960
1492
+ },
1493
+ {
1494
+ "epoch": 34.03,
1495
+ "eval_accuracy": 0.728110599078341,
1496
+ "eval_loss": 0.7850246429443359,
1497
+ "eval_runtime": 164.4991,
1498
+ "eval_samples_per_second": 1.319,
1499
+ "eval_steps_per_second": 0.17,
1500
+ "step": 1960
1501
+ },
1502
+ {
1503
+ "epoch": 35.0,
1504
+ "learning_rate": 1.2512512512512514e-06,
1505
+ "loss": 0.5967,
1506
+ "step": 1970
1507
+ },
1508
+ {
1509
+ "epoch": 35.01,
1510
+ "learning_rate": 1.2012012012012013e-06,
1511
+ "loss": 0.5503,
1512
+ "step": 1980
1513
+ },
1514
+ {
1515
+ "epoch": 35.01,
1516
+ "learning_rate": 1.1511511511511512e-06,
1517
+ "loss": 0.5159,
1518
+ "step": 1990
1519
+ },
1520
+ {
1521
+ "epoch": 35.02,
1522
+ "learning_rate": 1.1011011011011012e-06,
1523
+ "loss": 0.4377,
1524
+ "step": 2000
1525
+ },
1526
+ {
1527
+ "epoch": 35.02,
1528
+ "learning_rate": 1.051051051051051e-06,
1529
+ "loss": 0.5793,
1530
+ "step": 2010
1531
+ },
1532
+ {
1533
+ "epoch": 35.03,
1534
+ "eval_accuracy": 0.6728110599078341,
1535
+ "eval_loss": 0.8481199741363525,
1536
+ "eval_runtime": 164.3359,
1537
+ "eval_samples_per_second": 1.32,
1538
+ "eval_steps_per_second": 0.17,
1539
+ "step": 2016
1540
+ },
1541
+ {
1542
+ "epoch": 36.0,
1543
+ "learning_rate": 1.0010010010010011e-06,
1544
+ "loss": 0.7009,
1545
+ "step": 2020
1546
+ },
1547
+ {
1548
+ "epoch": 36.01,
1549
+ "learning_rate": 9.50950950950951e-07,
1550
+ "loss": 0.5902,
1551
+ "step": 2030
1552
+ },
1553
+ {
1554
+ "epoch": 36.01,
1555
+ "learning_rate": 9.00900900900901e-07,
1556
+ "loss": 0.5366,
1557
+ "step": 2040
1558
+ },
1559
+ {
1560
+ "epoch": 36.02,
1561
+ "learning_rate": 8.508508508508509e-07,
1562
+ "loss": 0.5119,
1563
+ "step": 2050
1564
+ },
1565
+ {
1566
+ "epoch": 36.02,
1567
+ "learning_rate": 8.008008008008009e-07,
1568
+ "loss": 0.4946,
1569
+ "step": 2060
1570
+ },
1571
+ {
1572
+ "epoch": 36.02,
1573
+ "learning_rate": 7.507507507507509e-07,
1574
+ "loss": 0.6411,
1575
+ "step": 2070
1576
+ },
1577
+ {
1578
+ "epoch": 36.03,
1579
+ "eval_accuracy": 0.6589861751152074,
1580
+ "eval_loss": 0.8842198252677917,
1581
+ "eval_runtime": 164.6218,
1582
+ "eval_samples_per_second": 1.318,
1583
+ "eval_steps_per_second": 0.17,
1584
+ "step": 2072
1585
+ },
1586
+ {
1587
+ "epoch": 37.0,
1588
+ "learning_rate": 7.007007007007008e-07,
1589
+ "loss": 0.5172,
1590
+ "step": 2080
1591
+ },
1592
+ {
1593
+ "epoch": 37.01,
1594
+ "learning_rate": 6.506506506506508e-07,
1595
+ "loss": 0.3986,
1596
+ "step": 2090
1597
+ },
1598
+ {
1599
+ "epoch": 37.01,
1600
+ "learning_rate": 6.006006006006006e-07,
1601
+ "loss": 0.4807,
1602
+ "step": 2100
1603
+ },
1604
+ {
1605
+ "epoch": 37.02,
1606
+ "learning_rate": 5.505505505505506e-07,
1607
+ "loss": 0.4681,
1608
+ "step": 2110
1609
+ },
1610
+ {
1611
+ "epoch": 37.02,
1612
+ "learning_rate": 5.005005005005006e-07,
1613
+ "loss": 0.6592,
1614
+ "step": 2120
1615
+ },
1616
+ {
1617
+ "epoch": 37.03,
1618
+ "eval_accuracy": 0.6912442396313364,
1619
+ "eval_loss": 0.802787184715271,
1620
+ "eval_runtime": 163.9059,
1621
+ "eval_samples_per_second": 1.324,
1622
+ "eval_steps_per_second": 0.171,
1623
+ "step": 2128
1624
+ },
1625
+ {
1626
+ "epoch": 38.0,
1627
+ "learning_rate": 4.504504504504505e-07,
1628
+ "loss": 0.5769,
1629
+ "step": 2130
1630
+ },
1631
+ {
1632
+ "epoch": 38.01,
1633
+ "learning_rate": 4.0040040040040045e-07,
1634
+ "loss": 0.6323,
1635
+ "step": 2140
1636
+ },
1637
+ {
1638
+ "epoch": 38.01,
1639
+ "learning_rate": 3.503503503503504e-07,
1640
+ "loss": 0.5319,
1641
+ "step": 2150
1642
+ },
1643
+ {
1644
+ "epoch": 38.01,
1645
+ "learning_rate": 3.003003003003003e-07,
1646
+ "loss": 0.6306,
1647
+ "step": 2160
1648
+ },
1649
+ {
1650
+ "epoch": 38.02,
1651
+ "learning_rate": 2.502502502502503e-07,
1652
+ "loss": 0.6192,
1653
+ "step": 2170
1654
+ },
1655
+ {
1656
+ "epoch": 38.02,
1657
+ "learning_rate": 2.0020020020020022e-07,
1658
+ "loss": 0.5524,
1659
+ "step": 2180
1660
+ },
1661
+ {
1662
+ "epoch": 38.03,
1663
+ "eval_accuracy": 0.6866359447004609,
1664
+ "eval_loss": 0.821592390537262,
1665
+ "eval_runtime": 165.1926,
1666
+ "eval_samples_per_second": 1.314,
1667
+ "eval_steps_per_second": 0.169,
1668
+ "step": 2184
1669
+ },
1670
+ {
1671
+ "epoch": 39.0,
1672
+ "learning_rate": 1.5015015015015016e-07,
1673
+ "loss": 0.5701,
1674
+ "step": 2190
1675
+ },
1676
+ {
1677
+ "epoch": 39.01,
1678
+ "learning_rate": 1.0010010010010011e-07,
1679
+ "loss": 0.5891,
1680
+ "step": 2200
1681
+ },
1682
+ {
1683
+ "epoch": 39.01,
1684
+ "learning_rate": 5.0050050050050056e-08,
1685
+ "loss": 0.5807,
1686
+ "step": 2210
1687
+ },
1688
+ {
1689
+ "epoch": 39.02,
1690
+ "learning_rate": 0.0,
1691
+ "loss": 0.5697,
1692
+ "step": 2220
1693
+ },
1694
+ {
1695
+ "epoch": 39.02,
1696
+ "eval_accuracy": 0.6774193548387096,
1697
+ "eval_loss": 0.833946943283081,
1698
+ "eval_runtime": 164.2083,
1699
+ "eval_samples_per_second": 1.321,
1700
+ "eval_steps_per_second": 0.171,
1701
+ "step": 2220
1702
+ },
1703
+ {
1704
+ "epoch": 39.02,
1705
+ "step": 2220,
1706
+ "total_flos": 2.208334746599719e+19,
1707
+ "train_loss": 0.8170982454274152,
1708
+ "train_runtime": 23121.8467,
1709
+ "train_samples_per_second": 0.768,
1710
+ "train_steps_per_second": 0.096
1711
+ },
1712
+ {
1713
+ "epoch": 39.02,
1714
+ "eval_accuracy": 0.6342592592592593,
1715
+ "eval_loss": 0.8682467341423035,
1716
+ "eval_runtime": 165.599,
1717
+ "eval_samples_per_second": 1.304,
1718
+ "eval_steps_per_second": 0.163,
1719
+ "step": 2220
1720
+ },
1721
+ {
1722
+ "epoch": 39.02,
1723
+ "eval_accuracy": 0.6342592592592593,
1724
+ "eval_loss": 0.8682467341423035,
1725
+ "eval_runtime": 165.9338,
1726
+ "eval_samples_per_second": 1.302,
1727
+ "eval_steps_per_second": 0.163,
1728
+ "step": 2220
1729
  }
1730
  ],
1731
  "logging_steps": 10,
1732
+ "max_steps": 2220,
1733
  "num_input_tokens_seen": 0,
1734
  "num_train_epochs": 9223372036854775807,
1735
  "save_steps": 500,
1736
+ "total_flos": 2.208334746599719e+19,
1737
  "train_batch_size": 8,
1738
  "trial_name": null,
1739
  "trial_params": null