Joy28 commited on
Commit
289f69a
·
verified ·
1 Parent(s): bb2b621

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -6
  2. test_results.json +6 -6
  3. trainer_state.json +459 -1725
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 49.01,
3
- "eval_accuracy": 0.6805555555555556,
4
- "eval_loss": 0.7703171968460083,
5
- "eval_runtime": 162.7205,
6
- "eval_samples_per_second": 1.327,
7
- "eval_steps_per_second": 0.166
8
  }
 
1
  {
2
+ "epoch": 19.04,
3
+ "eval_accuracy": 0.6666666666666666,
4
+ "eval_loss": 0.6926783919334412,
5
+ "eval_runtime": 163.2396,
6
+ "eval_samples_per_second": 1.323,
7
+ "eval_steps_per_second": 0.165
8
  }
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 49.01,
3
- "eval_accuracy": 0.6805555555555556,
4
- "eval_loss": 0.7703171968460083,
5
- "eval_runtime": 162.7205,
6
- "eval_samples_per_second": 1.327,
7
- "eval_steps_per_second": 0.166
8
  }
 
1
  {
2
+ "epoch": 19.04,
3
+ "eval_accuracy": 0.6666666666666666,
4
+ "eval_loss": 0.6926783919334412,
5
+ "eval_runtime": 163.2396,
6
+ "eval_samples_per_second": 1.323,
7
+ "eval_steps_per_second": 0.165
8
  }
trainer_state.json CHANGED
@@ -1,2159 +1,893 @@
1
  {
2
- "best_metric": 0.6912442396313364,
3
  "best_model_checkpoint": "videomae-base-finetuned-subset-check10/checkpoint-896",
4
- "epoch": 49.01117117117117,
5
  "eval_steps": 500,
6
- "global_step": 2775,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0,
13
- "learning_rate": 3.5971223021582736e-07,
14
- "loss": 1.6473,
15
  "step": 10
16
  },
17
  {
18
- "epoch": 0.01,
19
- "learning_rate": 7.194244604316547e-07,
20
- "loss": 1.6938,
21
  "step": 20
22
  },
23
  {
24
- "epoch": 0.01,
25
- "learning_rate": 1.079136690647482e-06,
26
- "loss": 1.6967,
27
  "step": 30
28
  },
29
  {
30
- "epoch": 0.01,
31
- "learning_rate": 1.4388489208633094e-06,
32
- "loss": 1.6515,
33
  "step": 40
34
  },
35
  {
36
- "epoch": 0.02,
37
- "learning_rate": 1.7985611510791368e-06,
38
- "loss": 1.6256,
39
  "step": 50
40
  },
41
  {
42
- "epoch": 0.02,
43
- "eval_accuracy": 0.1382488479262673,
44
- "eval_loss": 1.6078100204467773,
45
- "eval_runtime": 169.5226,
46
- "eval_samples_per_second": 1.28,
47
  "eval_steps_per_second": 0.165,
48
  "step": 56
49
  },
50
  {
51
  "epoch": 1.0,
52
- "learning_rate": 2.158273381294964e-06,
53
- "loss": 1.6524,
54
  "step": 60
55
  },
56
  {
57
  "epoch": 1.01,
58
- "learning_rate": 2.5179856115107916e-06,
59
- "loss": 1.662,
60
  "step": 70
61
  },
62
  {
63
- "epoch": 1.01,
64
- "learning_rate": 2.877697841726619e-06,
65
- "loss": 1.6402,
66
  "step": 80
67
  },
68
  {
69
- "epoch": 1.01,
70
- "learning_rate": 3.237410071942446e-06,
71
- "loss": 1.6405,
72
  "step": 90
73
  },
74
  {
75
- "epoch": 1.02,
76
- "learning_rate": 3.5971223021582737e-06,
77
- "loss": 1.6123,
78
  "step": 100
79
  },
80
  {
81
- "epoch": 1.02,
82
- "learning_rate": 3.956834532374101e-06,
83
- "loss": 1.6131,
84
  "step": 110
85
  },
86
  {
87
- "epoch": 1.02,
88
- "eval_accuracy": 0.17972350230414746,
89
- "eval_loss": 1.5918368101119995,
90
- "eval_runtime": 164.3238,
91
- "eval_samples_per_second": 1.321,
92
- "eval_steps_per_second": 0.17,
93
  "step": 112
94
  },
95
  {
96
- "epoch": 2.0,
97
- "learning_rate": 4.316546762589928e-06,
98
- "loss": 1.6037,
99
  "step": 120
100
  },
101
  {
102
- "epoch": 2.01,
103
- "learning_rate": 4.676258992805755e-06,
104
- "loss": 1.618,
105
  "step": 130
106
  },
107
  {
108
- "epoch": 2.01,
109
- "learning_rate": 5.035971223021583e-06,
110
- "loss": 1.6164,
111
  "step": 140
112
  },
113
  {
114
- "epoch": 2.01,
115
- "learning_rate": 5.3956834532374105e-06,
116
- "loss": 1.5948,
117
  "step": 150
118
  },
119
  {
120
- "epoch": 2.02,
121
- "learning_rate": 5.755395683453238e-06,
122
- "loss": 1.5876,
123
  "step": 160
124
  },
125
  {
126
- "epoch": 2.02,
127
- "eval_accuracy": 0.30414746543778803,
128
- "eval_loss": 1.5944809913635254,
129
- "eval_runtime": 166.4449,
130
- "eval_samples_per_second": 1.304,
131
- "eval_steps_per_second": 0.168,
132
  "step": 168
133
  },
134
  {
135
  "epoch": 3.0,
136
- "learning_rate": 6.115107913669065e-06,
137
- "loss": 1.649,
138
  "step": 170
139
  },
140
  {
141
- "epoch": 3.0,
142
- "learning_rate": 6.474820143884892e-06,
143
- "loss": 1.6068,
144
  "step": 180
145
  },
146
  {
147
- "epoch": 3.01,
148
- "learning_rate": 6.834532374100719e-06,
149
- "loss": 1.5767,
150
  "step": 190
151
  },
152
  {
153
- "epoch": 3.01,
154
- "learning_rate": 7.194244604316547e-06,
155
- "loss": 1.6003,
156
  "step": 200
157
  },
158
  {
159
- "epoch": 3.02,
160
- "learning_rate": 7.5539568345323745e-06,
161
- "loss": 1.5721,
162
  "step": 210
163
  },
164
  {
165
- "epoch": 3.02,
166
- "learning_rate": 7.913669064748202e-06,
167
- "loss": 1.6136,
168
  "step": 220
169
  },
170
  {
171
- "epoch": 3.02,
172
- "eval_accuracy": 0.4608294930875576,
173
- "eval_loss": 1.5251535177230835,
174
- "eval_runtime": 163.7672,
175
- "eval_samples_per_second": 1.325,
176
- "eval_steps_per_second": 0.171,
177
  "step": 224
178
  },
179
  {
180
- "epoch": 4.0,
181
- "learning_rate": 8.273381294964029e-06,
182
- "loss": 1.6221,
183
  "step": 230
184
  },
185
  {
186
  "epoch": 4.01,
187
- "learning_rate": 8.633093525179856e-06,
188
- "loss": 1.5636,
189
  "step": 240
190
  },
191
  {
192
- "epoch": 4.01,
193
- "learning_rate": 8.992805755395683e-06,
194
- "loss": 1.5337,
195
  "step": 250
196
  },
197
  {
198
- "epoch": 4.01,
199
- "learning_rate": 9.35251798561151e-06,
200
- "loss": 1.562,
201
  "step": 260
202
  },
203
  {
204
- "epoch": 4.02,
205
- "learning_rate": 9.712230215827338e-06,
206
- "loss": 1.5213,
207
  "step": 270
208
  },
209
  {
210
- "epoch": 4.02,
211
- "learning_rate": 9.99199038846616e-06,
212
- "loss": 1.5186,
213
  "step": 280
214
  },
215
  {
216
- "epoch": 4.02,
217
- "eval_accuracy": 0.4377880184331797,
218
- "eval_loss": 1.4925481081008911,
219
- "eval_runtime": 165.4099,
220
- "eval_samples_per_second": 1.312,
221
- "eval_steps_per_second": 0.169,
222
  "step": 280
223
  },
224
  {
225
- "epoch": 5.0,
226
- "learning_rate": 9.951942330796958e-06,
227
- "loss": 1.4726,
228
  "step": 290
229
  },
230
  {
231
- "epoch": 5.01,
232
- "learning_rate": 9.911894273127755e-06,
233
- "loss": 1.4936,
234
  "step": 300
235
  },
236
  {
237
- "epoch": 5.01,
238
- "learning_rate": 9.871846215458551e-06,
239
- "loss": 1.4841,
240
  "step": 310
241
  },
242
  {
243
- "epoch": 5.01,
244
- "learning_rate": 9.831798157789349e-06,
245
- "loss": 1.3298,
246
  "step": 320
247
  },
248
  {
249
- "epoch": 5.02,
250
- "learning_rate": 9.791750100120145e-06,
251
- "loss": 1.3534,
252
  "step": 330
253
  },
254
  {
255
- "epoch": 5.02,
256
- "eval_accuracy": 0.5299539170506913,
257
- "eval_loss": 1.24056875705719,
258
- "eval_runtime": 164.7127,
259
- "eval_samples_per_second": 1.317,
260
- "eval_steps_per_second": 0.17,
261
  "step": 336
262
  },
263
  {
264
  "epoch": 6.0,
265
- "learning_rate": 9.751702042450942e-06,
266
- "loss": 1.4524,
267
  "step": 340
268
  },
269
  {
270
  "epoch": 6.01,
271
- "learning_rate": 9.71165398478174e-06,
272
- "loss": 1.294,
273
  "step": 350
274
  },
275
  {
276
- "epoch": 6.01,
277
- "learning_rate": 9.671605927112536e-06,
278
- "loss": 1.3003,
279
  "step": 360
280
  },
281
  {
282
- "epoch": 6.01,
283
- "learning_rate": 9.631557869443333e-06,
284
- "loss": 1.195,
285
  "step": 370
286
  },
287
  {
288
- "epoch": 6.02,
289
- "learning_rate": 9.59150981177413e-06,
290
- "loss": 1.137,
291
  "step": 380
292
  },
293
  {
294
- "epoch": 6.02,
295
- "learning_rate": 9.551461754104927e-06,
296
- "loss": 1.2166,
297
  "step": 390
298
  },
299
  {
300
- "epoch": 6.02,
301
- "eval_accuracy": 0.6082949308755761,
302
- "eval_loss": 1.1090563535690308,
303
- "eval_runtime": 164.323,
304
- "eval_samples_per_second": 1.321,
305
- "eval_steps_per_second": 0.17,
306
  "step": 392
307
  },
308
  {
309
- "epoch": 7.0,
310
- "learning_rate": 9.511413696435725e-06,
311
- "loss": 1.1719,
312
  "step": 400
313
  },
314
  {
315
- "epoch": 7.01,
316
- "learning_rate": 9.47136563876652e-06,
317
- "loss": 1.1407,
318
  "step": 410
319
  },
320
  {
321
- "epoch": 7.01,
322
- "learning_rate": 9.431317581097318e-06,
323
- "loss": 1.1204,
324
  "step": 420
325
  },
326
  {
327
- "epoch": 7.01,
328
- "learning_rate": 9.391269523428114e-06,
329
- "loss": 1.1748,
330
  "step": 430
331
  },
332
  {
333
- "epoch": 7.02,
334
- "learning_rate": 9.351221465758912e-06,
335
- "loss": 1.1799,
336
  "step": 440
337
  },
338
  {
339
- "epoch": 7.02,
340
- "eval_accuracy": 0.5253456221198156,
341
- "eval_loss": 1.1851890087127686,
342
- "eval_runtime": 165.7908,
343
- "eval_samples_per_second": 1.309,
344
- "eval_steps_per_second": 0.169,
345
  "step": 448
346
  },
347
  {
348
  "epoch": 8.0,
349
- "learning_rate": 9.31117340808971e-06,
350
- "loss": 1.2371,
351
  "step": 450
352
  },
353
  {
354
- "epoch": 8.0,
355
- "learning_rate": 9.271125350420505e-06,
356
- "loss": 1.0314,
357
  "step": 460
358
  },
359
  {
360
- "epoch": 8.01,
361
- "learning_rate": 9.231077292751303e-06,
362
- "loss": 1.138,
363
  "step": 470
364
  },
365
  {
366
- "epoch": 8.01,
367
- "learning_rate": 9.191029235082099e-06,
368
- "loss": 1.219,
369
  "step": 480
370
  },
371
  {
372
- "epoch": 8.02,
373
- "learning_rate": 9.150981177412896e-06,
374
- "loss": 1.0251,
375
  "step": 490
376
  },
377
  {
378
- "epoch": 8.02,
379
- "learning_rate": 9.110933119743694e-06,
380
- "loss": 1.0207,
381
  "step": 500
382
  },
383
  {
384
- "epoch": 8.02,
385
- "eval_accuracy": 0.5529953917050692,
386
- "eval_loss": 1.1371861696243286,
387
- "eval_runtime": 163.5913,
388
- "eval_samples_per_second": 1.326,
389
- "eval_steps_per_second": 0.171,
390
  "step": 504
391
  },
392
  {
393
- "epoch": 9.0,
394
- "learning_rate": 9.07088506207449e-06,
395
- "loss": 0.9883,
396
  "step": 510
397
  },
398
  {
399
  "epoch": 9.01,
400
- "learning_rate": 9.030837004405287e-06,
401
- "loss": 1.2037,
402
  "step": 520
403
  },
404
  {
405
- "epoch": 9.01,
406
- "learning_rate": 8.990788946736085e-06,
407
- "loss": 1.0463,
408
  "step": 530
409
  },
410
  {
411
- "epoch": 9.01,
412
- "learning_rate": 8.950740889066881e-06,
413
- "loss": 1.0991,
414
  "step": 540
415
  },
416
  {
417
- "epoch": 9.02,
418
- "learning_rate": 8.910692831397679e-06,
419
- "loss": 1.006,
420
  "step": 550
421
  },
422
  {
423
- "epoch": 9.02,
424
- "learning_rate": 8.870644773728475e-06,
425
- "loss": 1.131,
426
  "step": 560
427
  },
428
  {
429
- "epoch": 9.02,
430
- "eval_accuracy": 0.4792626728110599,
431
- "eval_loss": 1.2173593044281006,
432
- "eval_runtime": 166.496,
433
- "eval_samples_per_second": 1.303,
434
- "eval_steps_per_second": 0.168,
435
  "step": 560
436
  },
437
  {
438
- "epoch": 10.0,
439
- "learning_rate": 8.830596716059272e-06,
440
- "loss": 1.0511,
441
  "step": 570
442
  },
443
  {
444
- "epoch": 10.01,
445
- "learning_rate": 8.79054865839007e-06,
446
- "loss": 1.1173,
447
  "step": 580
448
  },
449
  {
450
- "epoch": 10.01,
451
- "learning_rate": 8.750500600720866e-06,
452
- "loss": 1.0267,
453
  "step": 590
454
  },
455
  {
456
- "epoch": 10.01,
457
- "learning_rate": 8.710452543051663e-06,
458
- "loss": 0.9008,
459
  "step": 600
460
  },
461
  {
462
- "epoch": 10.02,
463
- "learning_rate": 8.67040448538246e-06,
464
- "loss": 0.9063,
465
  "step": 610
466
  },
467
  {
468
- "epoch": 10.02,
469
- "eval_accuracy": 0.5714285714285714,
470
- "eval_loss": 1.055660605430603,
471
- "eval_runtime": 164.406,
472
- "eval_samples_per_second": 1.32,
473
- "eval_steps_per_second": 0.17,
474
  "step": 616
475
  },
476
  {
477
  "epoch": 11.0,
478
- "learning_rate": 8.630356427713257e-06,
479
- "loss": 0.9433,
480
  "step": 620
481
  },
482
  {
483
  "epoch": 11.01,
484
- "learning_rate": 8.590308370044054e-06,
485
- "loss": 0.9096,
486
  "step": 630
487
  },
488
  {
489
- "epoch": 11.01,
490
- "learning_rate": 8.55026031237485e-06,
491
- "loss": 0.9989,
492
  "step": 640
493
  },
494
  {
495
- "epoch": 11.01,
496
- "learning_rate": 8.510212254705648e-06,
497
- "loss": 0.856,
498
  "step": 650
499
  },
500
  {
501
- "epoch": 11.02,
502
- "learning_rate": 8.470164197036444e-06,
503
- "loss": 0.9063,
504
  "step": 660
505
  },
506
  {
507
- "epoch": 11.02,
508
- "learning_rate": 8.430116139367241e-06,
509
- "loss": 1.0308,
510
  "step": 670
511
  },
512
  {
513
- "epoch": 11.02,
514
- "eval_accuracy": 0.5944700460829493,
515
- "eval_loss": 1.055904746055603,
516
- "eval_runtime": 163.9813,
517
- "eval_samples_per_second": 1.323,
518
- "eval_steps_per_second": 0.171,
519
  "step": 672
520
  },
521
  {
522
- "epoch": 12.0,
523
- "learning_rate": 8.390068081698039e-06,
524
- "loss": 0.9958,
525
  "step": 680
526
  },
527
  {
528
- "epoch": 12.01,
529
- "learning_rate": 8.350020024028835e-06,
530
- "loss": 0.9275,
531
  "step": 690
532
  },
533
  {
534
- "epoch": 12.01,
535
- "learning_rate": 8.309971966359633e-06,
536
- "loss": 0.9112,
537
  "step": 700
538
  },
539
  {
540
- "epoch": 12.01,
541
- "learning_rate": 8.269923908690429e-06,
542
- "loss": 0.8474,
543
  "step": 710
544
  },
545
  {
546
- "epoch": 12.02,
547
- "learning_rate": 8.229875851021226e-06,
548
- "loss": 0.8939,
549
  "step": 720
550
  },
551
  {
552
- "epoch": 12.02,
553
- "eval_accuracy": 0.47465437788018433,
554
- "eval_loss": 1.265906810760498,
555
- "eval_runtime": 165.9908,
556
- "eval_samples_per_second": 1.307,
557
- "eval_steps_per_second": 0.169,
558
  "step": 728
559
  },
560
  {
561
  "epoch": 13.0,
562
- "learning_rate": 8.189827793352024e-06,
563
- "loss": 0.9501,
564
  "step": 730
565
  },
566
  {
567
- "epoch": 13.0,
568
- "learning_rate": 8.14977973568282e-06,
569
- "loss": 0.8779,
570
  "step": 740
571
  },
572
  {
573
- "epoch": 13.01,
574
- "learning_rate": 8.109731678013617e-06,
575
- "loss": 0.9526,
576
  "step": 750
577
  },
578
  {
579
- "epoch": 13.01,
580
- "learning_rate": 8.069683620344413e-06,
581
- "loss": 0.9557,
582
  "step": 760
583
  },
584
  {
585
- "epoch": 13.02,
586
- "learning_rate": 8.02963556267521e-06,
587
- "loss": 0.8857,
588
  "step": 770
589
  },
590
  {
591
- "epoch": 13.02,
592
- "learning_rate": 7.989587505006008e-06,
593
- "loss": 0.7683,
594
  "step": 780
595
  },
596
  {
597
- "epoch": 13.02,
598
- "eval_accuracy": 0.5990783410138248,
599
- "eval_loss": 0.9757941365242004,
600
- "eval_runtime": 164.1944,
601
  "eval_samples_per_second": 1.322,
602
  "eval_steps_per_second": 0.171,
603
  "step": 784
604
  },
605
  {
606
- "epoch": 14.0,
607
- "learning_rate": 7.949539447336804e-06,
608
- "loss": 0.944,
609
  "step": 790
610
  },
611
  {
612
  "epoch": 14.01,
613
- "learning_rate": 7.909491389667602e-06,
614
- "loss": 0.8302,
615
  "step": 800
616
  },
617
  {
618
- "epoch": 14.01,
619
- "learning_rate": 7.869443331998398e-06,
620
- "loss": 0.8932,
621
  "step": 810
622
  },
623
  {
624
- "epoch": 14.01,
625
- "learning_rate": 7.829395274329196e-06,
626
- "loss": 0.7827,
627
  "step": 820
628
  },
629
  {
630
- "epoch": 14.02,
631
- "learning_rate": 7.789347216659993e-06,
632
- "loss": 0.8574,
633
  "step": 830
634
  },
635
  {
636
- "epoch": 14.02,
637
- "learning_rate": 7.749299158990789e-06,
638
- "loss": 0.8586,
639
  "step": 840
640
  },
641
  {
642
- "epoch": 14.02,
643
- "eval_accuracy": 0.6728110599078341,
644
- "eval_loss": 0.8495957851409912,
645
- "eval_runtime": 165.1894,
646
- "eval_samples_per_second": 1.314,
647
- "eval_steps_per_second": 0.17,
648
  "step": 840
649
  },
650
  {
651
- "epoch": 15.0,
652
- "learning_rate": 7.709251101321587e-06,
653
- "loss": 0.8769,
654
  "step": 850
655
  },
656
  {
657
- "epoch": 15.01,
658
- "learning_rate": 7.669203043652384e-06,
659
- "loss": 0.8732,
660
  "step": 860
661
  },
662
  {
663
- "epoch": 15.01,
664
- "learning_rate": 7.62915498598318e-06,
665
- "loss": 0.8314,
666
  "step": 870
667
  },
668
  {
669
- "epoch": 15.01,
670
- "learning_rate": 7.589106928313977e-06,
671
- "loss": 0.7307,
672
  "step": 880
673
  },
674
  {
675
- "epoch": 15.02,
676
- "learning_rate": 7.5490588706447746e-06,
677
- "loss": 0.7655,
678
  "step": 890
679
  },
680
  {
681
- "epoch": 15.02,
682
- "eval_accuracy": 0.6912442396313364,
683
- "eval_loss": 0.8554534316062927,
684
- "eval_runtime": 166.6109,
685
- "eval_samples_per_second": 1.302,
686
- "eval_steps_per_second": 0.168,
687
  "step": 896
688
  },
689
  {
690
  "epoch": 16.0,
691
- "learning_rate": 7.509010812975571e-06,
692
- "loss": 0.7943,
693
  "step": 900
694
  },
695
  {
696
  "epoch": 16.01,
697
- "learning_rate": 7.468962755306368e-06,
698
- "loss": 0.7588,
699
  "step": 910
700
  },
701
  {
702
- "epoch": 16.01,
703
- "learning_rate": 7.428914697637165e-06,
704
- "loss": 0.9166,
705
  "step": 920
706
  },
707
  {
708
- "epoch": 16.01,
709
- "learning_rate": 7.3888666399679625e-06,
710
- "loss": 0.8153,
711
  "step": 930
712
  },
713
  {
714
- "epoch": 16.02,
715
- "learning_rate": 7.348818582298759e-06,
716
- "loss": 0.8269,
717
  "step": 940
718
  },
719
  {
720
- "epoch": 16.02,
721
- "learning_rate": 7.308770524629556e-06,
722
- "loss": 0.622,
723
  "step": 950
724
  },
725
  {
726
- "epoch": 16.02,
727
- "eval_accuracy": 0.5207373271889401,
728
- "eval_loss": 1.2835302352905273,
729
- "eval_runtime": 164.5709,
730
- "eval_samples_per_second": 1.319,
731
- "eval_steps_per_second": 0.17,
732
  "step": 952
733
  },
734
  {
735
- "epoch": 17.0,
736
- "learning_rate": 7.268722466960353e-06,
737
- "loss": 0.8316,
738
  "step": 960
739
  },
740
  {
741
- "epoch": 17.01,
742
- "learning_rate": 7.2286744092911495e-06,
743
- "loss": 0.7093,
744
  "step": 970
745
  },
746
  {
747
- "epoch": 17.01,
748
- "learning_rate": 7.188626351621947e-06,
749
- "loss": 0.6915,
750
  "step": 980
751
  },
752
  {
753
- "epoch": 17.01,
754
- "learning_rate": 7.148578293952744e-06,
755
- "loss": 0.7887,
756
  "step": 990
757
  },
758
  {
759
- "epoch": 17.02,
760
- "learning_rate": 7.108530236283541e-06,
761
- "loss": 0.878,
762
  "step": 1000
763
  },
764
  {
765
- "epoch": 17.02,
766
- "eval_accuracy": 0.6912442396313364,
767
- "eval_loss": 0.7960665225982666,
768
- "eval_runtime": 163.5823,
769
- "eval_samples_per_second": 1.327,
770
- "eval_steps_per_second": 0.171,
771
  "step": 1008
772
  },
773
  {
774
  "epoch": 18.0,
775
- "learning_rate": 7.0684821786143374e-06,
776
- "loss": 0.8681,
777
  "step": 1010
778
  },
779
  {
780
- "epoch": 18.0,
781
- "learning_rate": 7.028434120945134e-06,
782
- "loss": 0.9578,
783
  "step": 1020
784
  },
785
  {
786
- "epoch": 18.01,
787
- "learning_rate": 6.988386063275932e-06,
788
- "loss": 0.7911,
789
  "step": 1030
790
  },
791
  {
792
- "epoch": 18.01,
793
- "learning_rate": 6.948338005606729e-06,
794
- "loss": 0.9059,
795
  "step": 1040
796
  },
797
  {
798
- "epoch": 18.02,
799
- "learning_rate": 6.908289947937525e-06,
800
- "loss": 0.7579,
801
  "step": 1050
802
  },
803
  {
804
- "epoch": 18.02,
805
- "learning_rate": 6.868241890268322e-06,
806
- "loss": 0.5976,
807
  "step": 1060
808
  },
809
  {
810
- "epoch": 18.02,
811
- "eval_accuracy": 0.6405529953917051,
812
- "eval_loss": 0.9144326448440552,
813
- "eval_runtime": 165.1317,
814
- "eval_samples_per_second": 1.314,
815
- "eval_steps_per_second": 0.17,
816
  "step": 1064
817
  },
818
  {
819
- "epoch": 19.0,
820
- "learning_rate": 6.828193832599119e-06,
821
- "loss": 0.6986,
822
  "step": 1070
823
  },
824
  {
825
  "epoch": 19.01,
826
- "learning_rate": 6.7881457749299165e-06,
827
- "loss": 0.9337,
828
  "step": 1080
829
  },
830
  {
831
- "epoch": 19.01,
832
- "learning_rate": 6.748097717260713e-06,
833
- "loss": 0.8697,
834
  "step": 1090
835
  },
836
  {
837
- "epoch": 19.01,
838
- "learning_rate": 6.70804965959151e-06,
839
- "loss": 0.6017,
840
  "step": 1100
841
  },
842
  {
843
- "epoch": 19.02,
844
- "learning_rate": 6.668001601922307e-06,
845
- "loss": 0.7251,
846
  "step": 1110
847
  },
848
  {
849
- "epoch": 19.02,
850
- "learning_rate": 6.627953544253104e-06,
851
- "loss": 0.7254,
852
- "step": 1120
853
- },
854
- {
855
- "epoch": 19.02,
856
- "eval_accuracy": 0.6175115207373272,
857
- "eval_loss": 0.9288573861122131,
858
- "eval_runtime": 163.2915,
859
- "eval_samples_per_second": 1.329,
860
- "eval_steps_per_second": 0.171,
861
- "step": 1120
862
- },
863
- {
864
- "epoch": 20.0,
865
- "learning_rate": 6.587905486583901e-06,
866
- "loss": 0.7905,
867
- "step": 1130
868
- },
869
- {
870
- "epoch": 20.01,
871
- "learning_rate": 6.547857428914698e-06,
872
- "loss": 0.7592,
873
- "step": 1140
874
- },
875
- {
876
- "epoch": 20.01,
877
- "learning_rate": 6.507809371245495e-06,
878
- "loss": 0.7639,
879
- "step": 1150
880
- },
881
- {
882
- "epoch": 20.01,
883
- "learning_rate": 6.4677613135762915e-06,
884
- "loss": 0.7202,
885
- "step": 1160
886
- },
887
- {
888
- "epoch": 20.02,
889
- "learning_rate": 6.427713255907089e-06,
890
- "loss": 0.7443,
891
- "step": 1170
892
- },
893
- {
894
- "epoch": 20.02,
895
- "eval_accuracy": 0.511520737327189,
896
- "eval_loss": 1.2425593137741089,
897
- "eval_runtime": 165.1561,
898
- "eval_samples_per_second": 1.314,
899
- "eval_steps_per_second": 0.17,
900
- "step": 1176
901
- },
902
- {
903
- "epoch": 21.0,
904
- "learning_rate": 6.387665198237886e-06,
905
- "loss": 0.7448,
906
- "step": 1180
907
- },
908
- {
909
- "epoch": 21.01,
910
- "learning_rate": 6.347617140568683e-06,
911
- "loss": 0.6349,
912
- "step": 1190
913
- },
914
- {
915
- "epoch": 21.01,
916
- "learning_rate": 6.307569082899479e-06,
917
- "loss": 0.672,
918
- "step": 1200
919
- },
920
- {
921
- "epoch": 21.01,
922
- "learning_rate": 6.267521025230276e-06,
923
- "loss": 0.5567,
924
- "step": 1210
925
- },
926
- {
927
- "epoch": 21.02,
928
- "learning_rate": 6.227472967561074e-06,
929
- "loss": 0.809,
930
- "step": 1220
931
  },
932
  {
933
- "epoch": 21.02,
934
- "learning_rate": 6.1874249098918705e-06,
935
- "loss": 0.5463,
936
- "step": 1230
 
 
 
937
  },
938
  {
939
- "epoch": 21.02,
940
- "eval_accuracy": 0.5529953917050692,
941
- "eval_loss": 1.1711310148239136,
942
- "eval_runtime": 165.4332,
943
  "eval_samples_per_second": 1.312,
944
- "eval_steps_per_second": 0.169,
945
- "step": 1232
946
- },
947
- {
948
- "epoch": 22.0,
949
- "learning_rate": 6.147376852222667e-06,
950
- "loss": 0.5671,
951
- "step": 1240
952
- },
953
- {
954
- "epoch": 22.01,
955
- "learning_rate": 6.107328794553464e-06,
956
- "loss": 0.6794,
957
- "step": 1250
958
- },
959
- {
960
- "epoch": 22.01,
961
- "learning_rate": 6.067280736884261e-06,
962
- "loss": 0.7263,
963
- "step": 1260
964
- },
965
- {
966
- "epoch": 22.01,
967
- "learning_rate": 6.027232679215058e-06,
968
- "loss": 0.6543,
969
- "step": 1270
970
- },
971
- {
972
- "epoch": 22.02,
973
- "learning_rate": 5.987184621545855e-06,
974
- "loss": 0.7503,
975
- "step": 1280
976
- },
977
- {
978
- "epoch": 22.02,
979
- "eval_accuracy": 0.4792626728110599,
980
- "eval_loss": 1.4511420726776123,
981
- "eval_runtime": 163.4694,
982
- "eval_samples_per_second": 1.327,
983
- "eval_steps_per_second": 0.171,
984
- "step": 1288
985
- },
986
- {
987
- "epoch": 23.0,
988
- "learning_rate": 5.947136563876652e-06,
989
- "loss": 0.7247,
990
- "step": 1290
991
- },
992
- {
993
- "epoch": 23.0,
994
- "learning_rate": 5.907088506207449e-06,
995
- "loss": 0.6522,
996
- "step": 1300
997
- },
998
- {
999
- "epoch": 23.01,
1000
- "learning_rate": 5.867040448538246e-06,
1001
- "loss": 0.6363,
1002
- "step": 1310
1003
- },
1004
- {
1005
- "epoch": 23.01,
1006
- "learning_rate": 5.826992390869043e-06,
1007
- "loss": 0.7127,
1008
- "step": 1320
1009
- },
1010
- {
1011
- "epoch": 23.02,
1012
- "learning_rate": 5.78694433319984e-06,
1013
- "loss": 0.5885,
1014
- "step": 1330
1015
- },
1016
- {
1017
- "epoch": 23.02,
1018
- "learning_rate": 5.746896275530637e-06,
1019
- "loss": 0.6529,
1020
- "step": 1340
1021
- },
1022
- {
1023
- "epoch": 23.02,
1024
- "eval_accuracy": 0.6866359447004609,
1025
- "eval_loss": 0.9009619951248169,
1026
- "eval_runtime": 171.0268,
1027
- "eval_samples_per_second": 1.269,
1028
  "eval_steps_per_second": 0.164,
1029
- "step": 1344
1030
- },
1031
- {
1032
- "epoch": 24.0,
1033
- "learning_rate": 5.706848217861433e-06,
1034
- "loss": 0.5402,
1035
- "step": 1350
1036
- },
1037
- {
1038
- "epoch": 24.01,
1039
- "learning_rate": 5.666800160192231e-06,
1040
- "loss": 0.6978,
1041
- "step": 1360
1042
- },
1043
- {
1044
- "epoch": 24.01,
1045
- "learning_rate": 5.626752102523028e-06,
1046
- "loss": 0.6435,
1047
- "step": 1370
1048
- },
1049
- {
1050
- "epoch": 24.01,
1051
- "learning_rate": 5.5867040448538245e-06,
1052
- "loss": 0.6665,
1053
- "step": 1380
1054
- },
1055
- {
1056
- "epoch": 24.02,
1057
- "learning_rate": 5.546655987184621e-06,
1058
- "loss": 0.805,
1059
- "step": 1390
1060
- },
1061
- {
1062
- "epoch": 24.02,
1063
- "learning_rate": 5.506607929515418e-06,
1064
- "loss": 0.6001,
1065
- "step": 1400
1066
- },
1067
- {
1068
- "epoch": 24.02,
1069
- "eval_accuracy": 0.6129032258064516,
1070
- "eval_loss": 0.9717797040939331,
1071
- "eval_runtime": 163.6301,
1072
- "eval_samples_per_second": 1.326,
1073
- "eval_steps_per_second": 0.171,
1074
- "step": 1400
1075
- },
1076
- {
1077
- "epoch": 25.0,
1078
- "learning_rate": 5.466559871846216e-06,
1079
- "loss": 0.6452,
1080
- "step": 1410
1081
  },
1082
  {
1083
- "epoch": 25.01,
1084
- "learning_rate": 5.4265118141770124e-06,
1085
- "loss": 0.5866,
1086
- "step": 1420
1087
- },
1088
- {
1089
- "epoch": 25.01,
1090
- "learning_rate": 5.386463756507809e-06,
1091
- "loss": 0.7066,
1092
- "step": 1430
1093
- },
1094
- {
1095
- "epoch": 25.01,
1096
- "learning_rate": 5.346415698838606e-06,
1097
- "loss": 0.543,
1098
- "step": 1440
1099
- },
1100
- {
1101
- "epoch": 25.02,
1102
- "learning_rate": 5.306367641169404e-06,
1103
- "loss": 0.55,
1104
- "step": 1450
1105
- },
1106
- {
1107
- "epoch": 25.02,
1108
- "eval_accuracy": 0.4377880184331797,
1109
- "eval_loss": 1.3723992109298706,
1110
- "eval_runtime": 164.6177,
1111
- "eval_samples_per_second": 1.318,
1112
- "eval_steps_per_second": 0.17,
1113
- "step": 1456
1114
- },
1115
- {
1116
- "epoch": 26.0,
1117
- "learning_rate": 5.2663195835002e-06,
1118
- "loss": 0.6087,
1119
- "step": 1460
1120
- },
1121
- {
1122
- "epoch": 26.01,
1123
- "learning_rate": 5.226271525830998e-06,
1124
- "loss": 0.6169,
1125
- "step": 1470
1126
- },
1127
- {
1128
- "epoch": 26.01,
1129
- "learning_rate": 5.186223468161795e-06,
1130
- "loss": 0.9426,
1131
- "step": 1480
1132
- },
1133
- {
1134
- "epoch": 26.01,
1135
- "learning_rate": 5.146175410492592e-06,
1136
- "loss": 0.4587,
1137
- "step": 1490
1138
- },
1139
- {
1140
- "epoch": 26.02,
1141
- "learning_rate": 5.106127352823389e-06,
1142
- "loss": 0.6725,
1143
- "step": 1500
1144
- },
1145
- {
1146
- "epoch": 26.02,
1147
- "learning_rate": 5.066079295154186e-06,
1148
- "loss": 0.7328,
1149
- "step": 1510
1150
- },
1151
- {
1152
- "epoch": 26.02,
1153
- "eval_accuracy": 0.4930875576036866,
1154
- "eval_loss": 1.2508519887924194,
1155
- "eval_runtime": 165.1191,
1156
- "eval_samples_per_second": 1.314,
1157
- "eval_steps_per_second": 0.17,
1158
- "step": 1512
1159
- },
1160
- {
1161
- "epoch": 27.0,
1162
- "learning_rate": 5.026031237484983e-06,
1163
- "loss": 0.4386,
1164
- "step": 1520
1165
- },
1166
- {
1167
- "epoch": 27.01,
1168
- "learning_rate": 4.985983179815779e-06,
1169
- "loss": 0.5087,
1170
- "step": 1530
1171
- },
1172
- {
1173
- "epoch": 27.01,
1174
- "learning_rate": 4.945935122146576e-06,
1175
- "loss": 0.8035,
1176
- "step": 1540
1177
- },
1178
- {
1179
- "epoch": 27.01,
1180
- "learning_rate": 4.905887064477373e-06,
1181
- "loss": 0.6609,
1182
- "step": 1550
1183
- },
1184
- {
1185
- "epoch": 27.02,
1186
- "learning_rate": 4.86583900680817e-06,
1187
- "loss": 0.5754,
1188
- "step": 1560
1189
- },
1190
- {
1191
- "epoch": 27.02,
1192
- "eval_accuracy": 0.663594470046083,
1193
- "eval_loss": 0.9112865924835205,
1194
- "eval_runtime": 163.6522,
1195
- "eval_samples_per_second": 1.326,
1196
- "eval_steps_per_second": 0.171,
1197
- "step": 1568
1198
- },
1199
- {
1200
- "epoch": 28.0,
1201
- "learning_rate": 4.825790949138967e-06,
1202
- "loss": 0.7097,
1203
- "step": 1570
1204
- },
1205
- {
1206
- "epoch": 28.0,
1207
- "learning_rate": 4.785742891469764e-06,
1208
- "loss": 0.5724,
1209
- "step": 1580
1210
- },
1211
- {
1212
- "epoch": 28.01,
1213
- "learning_rate": 4.745694833800561e-06,
1214
- "loss": 0.666,
1215
- "step": 1590
1216
- },
1217
- {
1218
- "epoch": 28.01,
1219
- "learning_rate": 4.705646776131358e-06,
1220
- "loss": 0.7482,
1221
- "step": 1600
1222
- },
1223
- {
1224
- "epoch": 28.02,
1225
- "learning_rate": 4.665598718462155e-06,
1226
- "loss": 0.7556,
1227
- "step": 1610
1228
- },
1229
- {
1230
- "epoch": 28.02,
1231
- "learning_rate": 4.625550660792952e-06,
1232
- "loss": 0.6717,
1233
- "step": 1620
1234
- },
1235
- {
1236
- "epoch": 28.02,
1237
- "eval_accuracy": 0.5345622119815668,
1238
- "eval_loss": 1.0872467756271362,
1239
- "eval_runtime": 163.65,
1240
- "eval_samples_per_second": 1.326,
1241
- "eval_steps_per_second": 0.171,
1242
- "step": 1624
1243
- },
1244
- {
1245
- "epoch": 29.0,
1246
- "learning_rate": 4.585502603123749e-06,
1247
- "loss": 0.7058,
1248
- "step": 1630
1249
- },
1250
- {
1251
- "epoch": 29.01,
1252
- "learning_rate": 4.5454545454545455e-06,
1253
- "loss": 0.6564,
1254
- "step": 1640
1255
- },
1256
- {
1257
- "epoch": 29.01,
1258
- "learning_rate": 4.505406487785342e-06,
1259
- "loss": 0.5864,
1260
- "step": 1650
1261
- },
1262
- {
1263
- "epoch": 29.01,
1264
- "learning_rate": 4.46535843011614e-06,
1265
- "loss": 0.751,
1266
- "step": 1660
1267
- },
1268
- {
1269
- "epoch": 29.02,
1270
- "learning_rate": 4.425310372446937e-06,
1271
- "loss": 0.6114,
1272
- "step": 1670
1273
- },
1274
- {
1275
- "epoch": 29.02,
1276
- "learning_rate": 4.3852623147777334e-06,
1277
- "loss": 0.8697,
1278
- "step": 1680
1279
- },
1280
- {
1281
- "epoch": 29.02,
1282
- "eval_accuracy": 0.663594470046083,
1283
- "eval_loss": 0.8410985469818115,
1284
- "eval_runtime": 163.2869,
1285
- "eval_samples_per_second": 1.329,
1286
- "eval_steps_per_second": 0.171,
1287
- "step": 1680
1288
- },
1289
- {
1290
- "epoch": 30.0,
1291
- "learning_rate": 4.34521425710853e-06,
1292
- "loss": 0.5641,
1293
- "step": 1690
1294
- },
1295
- {
1296
- "epoch": 30.01,
1297
- "learning_rate": 4.305166199439327e-06,
1298
- "loss": 0.6042,
1299
- "step": 1700
1300
- },
1301
- {
1302
- "epoch": 30.01,
1303
- "learning_rate": 4.2651181417701246e-06,
1304
- "loss": 0.6321,
1305
- "step": 1710
1306
- },
1307
- {
1308
- "epoch": 30.01,
1309
- "learning_rate": 4.225070084100921e-06,
1310
- "loss": 0.5942,
1311
- "step": 1720
1312
- },
1313
- {
1314
- "epoch": 30.02,
1315
- "learning_rate": 4.185022026431718e-06,
1316
- "loss": 0.4557,
1317
- "step": 1730
1318
- },
1319
- {
1320
- "epoch": 30.02,
1321
- "eval_accuracy": 0.5852534562211982,
1322
- "eval_loss": 1.0334526300430298,
1323
- "eval_runtime": 164.4022,
1324
- "eval_samples_per_second": 1.32,
1325
- "eval_steps_per_second": 0.17,
1326
- "step": 1736
1327
- },
1328
- {
1329
- "epoch": 31.0,
1330
- "learning_rate": 4.144973968762515e-06,
1331
- "loss": 0.5802,
1332
- "step": 1740
1333
- },
1334
- {
1335
- "epoch": 31.01,
1336
- "learning_rate": 4.1049259110933125e-06,
1337
- "loss": 0.8083,
1338
- "step": 1750
1339
- },
1340
- {
1341
- "epoch": 31.01,
1342
- "learning_rate": 4.064877853424109e-06,
1343
- "loss": 0.5083,
1344
- "step": 1760
1345
- },
1346
- {
1347
- "epoch": 31.01,
1348
- "learning_rate": 4.024829795754906e-06,
1349
- "loss": 0.782,
1350
- "step": 1770
1351
- },
1352
- {
1353
- "epoch": 31.02,
1354
- "learning_rate": 3.984781738085703e-06,
1355
- "loss": 0.6633,
1356
- "step": 1780
1357
- },
1358
- {
1359
- "epoch": 31.02,
1360
- "learning_rate": 3.9447336804165e-06,
1361
- "loss": 0.4873,
1362
- "step": 1790
1363
- },
1364
- {
1365
- "epoch": 31.02,
1366
- "eval_accuracy": 0.5299539170506913,
1367
- "eval_loss": 1.154846429824829,
1368
- "eval_runtime": 164.5771,
1369
- "eval_samples_per_second": 1.319,
1370
- "eval_steps_per_second": 0.17,
1371
- "step": 1792
1372
- },
1373
- {
1374
- "epoch": 32.0,
1375
- "learning_rate": 3.904685622747297e-06,
1376
- "loss": 0.569,
1377
- "step": 1800
1378
- },
1379
- {
1380
- "epoch": 32.01,
1381
- "learning_rate": 3.864637565078094e-06,
1382
- "loss": 0.5574,
1383
- "step": 1810
1384
- },
1385
- {
1386
- "epoch": 32.01,
1387
- "learning_rate": 3.8245895074088915e-06,
1388
- "loss": 0.5999,
1389
- "step": 1820
1390
- },
1391
- {
1392
- "epoch": 32.01,
1393
- "learning_rate": 3.7845414497396883e-06,
1394
- "loss": 0.6882,
1395
- "step": 1830
1396
- },
1397
- {
1398
- "epoch": 32.02,
1399
- "learning_rate": 3.744493392070485e-06,
1400
- "loss": 0.7812,
1401
- "step": 1840
1402
- },
1403
- {
1404
- "epoch": 32.02,
1405
- "eval_accuracy": 0.5990783410138248,
1406
- "eval_loss": 1.036067247390747,
1407
- "eval_runtime": 165.0007,
1408
- "eval_samples_per_second": 1.315,
1409
- "eval_steps_per_second": 0.17,
1410
- "step": 1848
1411
- },
1412
- {
1413
- "epoch": 33.0,
1414
- "learning_rate": 3.7044453344012823e-06,
1415
- "loss": 0.6552,
1416
- "step": 1850
1417
- },
1418
- {
1419
- "epoch": 33.0,
1420
- "learning_rate": 3.664397276732079e-06,
1421
- "loss": 0.724,
1422
- "step": 1860
1423
- },
1424
- {
1425
- "epoch": 33.01,
1426
- "learning_rate": 3.6243492190628758e-06,
1427
- "loss": 0.6444,
1428
- "step": 1870
1429
- },
1430
- {
1431
- "epoch": 33.01,
1432
- "learning_rate": 3.584301161393673e-06,
1433
- "loss": 0.5467,
1434
- "step": 1880
1435
- },
1436
- {
1437
- "epoch": 33.02,
1438
- "learning_rate": 3.5442531037244697e-06,
1439
- "loss": 0.4304,
1440
- "step": 1890
1441
- },
1442
- {
1443
- "epoch": 33.02,
1444
- "learning_rate": 3.504205046055267e-06,
1445
- "loss": 0.5975,
1446
- "step": 1900
1447
- },
1448
- {
1449
- "epoch": 33.02,
1450
- "eval_accuracy": 0.5253456221198156,
1451
- "eval_loss": 1.2160247564315796,
1452
- "eval_runtime": 164.1864,
1453
- "eval_samples_per_second": 1.322,
1454
- "eval_steps_per_second": 0.171,
1455
- "step": 1904
1456
- },
1457
- {
1458
- "epoch": 34.0,
1459
- "learning_rate": 3.4641569883860637e-06,
1460
- "loss": 0.5616,
1461
- "step": 1910
1462
- },
1463
- {
1464
- "epoch": 34.01,
1465
- "learning_rate": 3.424108930716861e-06,
1466
- "loss": 0.5305,
1467
- "step": 1920
1468
- },
1469
- {
1470
- "epoch": 34.01,
1471
- "learning_rate": 3.3840608730476576e-06,
1472
- "loss": 0.5134,
1473
- "step": 1930
1474
- },
1475
- {
1476
- "epoch": 34.01,
1477
- "learning_rate": 3.3440128153784544e-06,
1478
- "loss": 0.5199,
1479
- "step": 1940
1480
- },
1481
- {
1482
- "epoch": 34.02,
1483
- "learning_rate": 3.3039647577092516e-06,
1484
- "loss": 0.5365,
1485
- "step": 1950
1486
- },
1487
- {
1488
- "epoch": 34.02,
1489
- "learning_rate": 3.2639167000400484e-06,
1490
- "loss": 0.6439,
1491
- "step": 1960
1492
- },
1493
- {
1494
- "epoch": 34.02,
1495
- "eval_accuracy": 0.576036866359447,
1496
- "eval_loss": 1.0630296468734741,
1497
- "eval_runtime": 164.642,
1498
- "eval_samples_per_second": 1.318,
1499
- "eval_steps_per_second": 0.17,
1500
- "step": 1960
1501
- },
1502
- {
1503
- "epoch": 35.0,
1504
- "learning_rate": 3.2238686423708456e-06,
1505
- "loss": 0.4724,
1506
- "step": 1970
1507
- },
1508
- {
1509
- "epoch": 35.01,
1510
- "learning_rate": 3.1838205847016423e-06,
1511
- "loss": 0.5804,
1512
- "step": 1980
1513
- },
1514
- {
1515
- "epoch": 35.01,
1516
- "learning_rate": 3.143772527032439e-06,
1517
- "loss": 0.607,
1518
- "step": 1990
1519
- },
1520
- {
1521
- "epoch": 35.01,
1522
- "learning_rate": 3.1037244693632363e-06,
1523
- "loss": 0.64,
1524
- "step": 2000
1525
- },
1526
- {
1527
- "epoch": 35.02,
1528
- "learning_rate": 3.063676411694033e-06,
1529
- "loss": 0.597,
1530
- "step": 2010
1531
- },
1532
- {
1533
- "epoch": 35.02,
1534
- "eval_accuracy": 0.6082949308755761,
1535
- "eval_loss": 1.0567601919174194,
1536
- "eval_runtime": 163.9076,
1537
- "eval_samples_per_second": 1.324,
1538
- "eval_steps_per_second": 0.171,
1539
- "step": 2016
1540
- },
1541
- {
1542
- "epoch": 36.0,
1543
- "learning_rate": 3.0236283540248302e-06,
1544
- "loss": 0.5915,
1545
- "step": 2020
1546
- },
1547
- {
1548
- "epoch": 36.01,
1549
- "learning_rate": 2.983580296355627e-06,
1550
- "loss": 0.6774,
1551
- "step": 2030
1552
- },
1553
- {
1554
- "epoch": 36.01,
1555
- "learning_rate": 2.943532238686424e-06,
1556
- "loss": 0.4855,
1557
- "step": 2040
1558
- },
1559
- {
1560
- "epoch": 36.01,
1561
- "learning_rate": 2.903484181017221e-06,
1562
- "loss": 0.6448,
1563
- "step": 2050
1564
- },
1565
- {
1566
- "epoch": 36.02,
1567
- "learning_rate": 2.8634361233480177e-06,
1568
- "loss": 0.6921,
1569
- "step": 2060
1570
- },
1571
- {
1572
- "epoch": 36.02,
1573
- "learning_rate": 2.823388065678815e-06,
1574
- "loss": 0.5641,
1575
- "step": 2070
1576
- },
1577
- {
1578
- "epoch": 36.02,
1579
- "eval_accuracy": 0.6682027649769585,
1580
- "eval_loss": 0.950232982635498,
1581
- "eval_runtime": 163.9254,
1582
- "eval_samples_per_second": 1.324,
1583
- "eval_steps_per_second": 0.171,
1584
- "step": 2072
1585
- },
1586
- {
1587
- "epoch": 37.0,
1588
- "learning_rate": 2.7833400080096117e-06,
1589
- "loss": 0.4284,
1590
- "step": 2080
1591
- },
1592
- {
1593
- "epoch": 37.01,
1594
- "learning_rate": 2.743291950340409e-06,
1595
- "loss": 0.4562,
1596
- "step": 2090
1597
- },
1598
- {
1599
- "epoch": 37.01,
1600
- "learning_rate": 2.7032438926712056e-06,
1601
- "loss": 0.3981,
1602
- "step": 2100
1603
- },
1604
- {
1605
- "epoch": 37.01,
1606
- "learning_rate": 2.663195835002003e-06,
1607
- "loss": 0.675,
1608
- "step": 2110
1609
- },
1610
- {
1611
- "epoch": 37.02,
1612
- "learning_rate": 2.6231477773327996e-06,
1613
- "loss": 0.4992,
1614
- "step": 2120
1615
- },
1616
- {
1617
- "epoch": 37.02,
1618
- "eval_accuracy": 0.5898617511520737,
1619
- "eval_loss": 1.0643178224563599,
1620
- "eval_runtime": 163.7021,
1621
- "eval_samples_per_second": 1.326,
1622
- "eval_steps_per_second": 0.171,
1623
- "step": 2128
1624
- },
1625
- {
1626
- "epoch": 38.0,
1627
- "learning_rate": 2.5830997196635963e-06,
1628
- "loss": 0.5807,
1629
- "step": 2130
1630
- },
1631
- {
1632
- "epoch": 38.0,
1633
- "learning_rate": 2.5430516619943935e-06,
1634
- "loss": 0.5385,
1635
- "step": 2140
1636
- },
1637
- {
1638
- "epoch": 38.01,
1639
- "learning_rate": 2.5030036043251903e-06,
1640
- "loss": 0.5467,
1641
- "step": 2150
1642
- },
1643
- {
1644
- "epoch": 38.01,
1645
- "learning_rate": 2.4629555466559875e-06,
1646
- "loss": 0.5206,
1647
- "step": 2160
1648
- },
1649
- {
1650
- "epoch": 38.02,
1651
- "learning_rate": 2.4229074889867843e-06,
1652
- "loss": 0.648,
1653
- "step": 2170
1654
- },
1655
- {
1656
- "epoch": 38.02,
1657
- "learning_rate": 2.3828594313175814e-06,
1658
- "loss": 0.4136,
1659
- "step": 2180
1660
- },
1661
- {
1662
- "epoch": 38.02,
1663
- "eval_accuracy": 0.6082949308755761,
1664
- "eval_loss": 0.9271302223205566,
1665
- "eval_runtime": 163.5799,
1666
- "eval_samples_per_second": 1.327,
1667
- "eval_steps_per_second": 0.171,
1668
- "step": 2184
1669
- },
1670
- {
1671
- "epoch": 39.0,
1672
- "learning_rate": 2.342811373648378e-06,
1673
- "loss": 0.6367,
1674
- "step": 2190
1675
- },
1676
- {
1677
- "epoch": 39.01,
1678
- "learning_rate": 2.302763315979175e-06,
1679
- "loss": 0.4054,
1680
- "step": 2200
1681
- },
1682
- {
1683
- "epoch": 39.01,
1684
- "learning_rate": 2.262715258309972e-06,
1685
- "loss": 0.6031,
1686
- "step": 2210
1687
- },
1688
- {
1689
- "epoch": 39.01,
1690
- "learning_rate": 2.222667200640769e-06,
1691
- "loss": 0.6353,
1692
- "step": 2220
1693
- },
1694
- {
1695
- "epoch": 39.02,
1696
- "learning_rate": 2.182619142971566e-06,
1697
- "loss": 0.7539,
1698
- "step": 2230
1699
- },
1700
- {
1701
- "epoch": 39.02,
1702
- "learning_rate": 2.142571085302363e-06,
1703
- "loss": 0.5194,
1704
- "step": 2240
1705
- },
1706
- {
1707
- "epoch": 39.02,
1708
- "eval_accuracy": 0.576036866359447,
1709
- "eval_loss": 1.0672531127929688,
1710
- "eval_runtime": 166.7593,
1711
- "eval_samples_per_second": 1.301,
1712
- "eval_steps_per_second": 0.168,
1713
- "step": 2240
1714
- },
1715
- {
1716
- "epoch": 40.0,
1717
- "learning_rate": 2.1025230276331596e-06,
1718
- "loss": 0.5378,
1719
- "step": 2250
1720
- },
1721
- {
1722
- "epoch": 40.01,
1723
- "learning_rate": 2.062474969963957e-06,
1724
- "loss": 0.548,
1725
- "step": 2260
1726
- },
1727
- {
1728
- "epoch": 40.01,
1729
- "learning_rate": 2.0224269122947536e-06,
1730
- "loss": 0.4956,
1731
- "step": 2270
1732
- },
1733
- {
1734
- "epoch": 40.01,
1735
- "learning_rate": 1.982378854625551e-06,
1736
- "loss": 0.5747,
1737
- "step": 2280
1738
- },
1739
- {
1740
- "epoch": 40.02,
1741
- "learning_rate": 1.942330796956348e-06,
1742
- "loss": 0.7653,
1743
- "step": 2290
1744
- },
1745
- {
1746
- "epoch": 40.02,
1747
- "eval_accuracy": 0.6267281105990783,
1748
- "eval_loss": 0.9899203181266785,
1749
- "eval_runtime": 163.3473,
1750
- "eval_samples_per_second": 1.328,
1751
- "eval_steps_per_second": 0.171,
1752
- "step": 2296
1753
- },
1754
- {
1755
- "epoch": 41.0,
1756
- "learning_rate": 1.902282739287145e-06,
1757
- "loss": 0.6948,
1758
- "step": 2300
1759
- },
1760
- {
1761
- "epoch": 41.01,
1762
- "learning_rate": 1.8622346816179417e-06,
1763
- "loss": 0.4703,
1764
- "step": 2310
1765
- },
1766
- {
1767
- "epoch": 41.01,
1768
- "learning_rate": 1.8221866239487387e-06,
1769
- "loss": 0.5782,
1770
- "step": 2320
1771
- },
1772
- {
1773
- "epoch": 41.01,
1774
- "learning_rate": 1.7821385662795357e-06,
1775
- "loss": 0.4866,
1776
- "step": 2330
1777
- },
1778
- {
1779
- "epoch": 41.02,
1780
- "learning_rate": 1.7420905086103327e-06,
1781
- "loss": 0.5591,
1782
- "step": 2340
1783
- },
1784
- {
1785
- "epoch": 41.02,
1786
- "learning_rate": 1.7020424509411296e-06,
1787
- "loss": 0.6247,
1788
- "step": 2350
1789
- },
1790
- {
1791
- "epoch": 41.02,
1792
- "eval_accuracy": 0.631336405529954,
1793
- "eval_loss": 1.0216394662857056,
1794
- "eval_runtime": 164.836,
1795
- "eval_samples_per_second": 1.316,
1796
- "eval_steps_per_second": 0.17,
1797
- "step": 2352
1798
- },
1799
- {
1800
- "epoch": 42.0,
1801
- "learning_rate": 1.6619943932719266e-06,
1802
- "loss": 0.4712,
1803
- "step": 2360
1804
- },
1805
- {
1806
- "epoch": 42.01,
1807
- "learning_rate": 1.6219463356027234e-06,
1808
- "loss": 0.4287,
1809
- "step": 2370
1810
- },
1811
- {
1812
- "epoch": 42.01,
1813
- "learning_rate": 1.5818982779335204e-06,
1814
- "loss": 0.5673,
1815
- "step": 2380
1816
- },
1817
- {
1818
- "epoch": 42.01,
1819
- "learning_rate": 1.5418502202643173e-06,
1820
- "loss": 0.4914,
1821
- "step": 2390
1822
- },
1823
- {
1824
- "epoch": 42.02,
1825
- "learning_rate": 1.5018021625951143e-06,
1826
- "loss": 0.6901,
1827
- "step": 2400
1828
- },
1829
- {
1830
- "epoch": 42.02,
1831
- "eval_accuracy": 0.5576036866359447,
1832
- "eval_loss": 1.1336352825164795,
1833
- "eval_runtime": 163.2895,
1834
- "eval_samples_per_second": 1.329,
1835
- "eval_steps_per_second": 0.171,
1836
- "step": 2408
1837
- },
1838
- {
1839
- "epoch": 43.0,
1840
- "learning_rate": 1.4617541049259113e-06,
1841
- "loss": 0.4876,
1842
- "step": 2410
1843
- },
1844
- {
1845
- "epoch": 43.0,
1846
- "learning_rate": 1.4217060472567083e-06,
1847
- "loss": 0.4494,
1848
- "step": 2420
1849
- },
1850
- {
1851
- "epoch": 43.01,
1852
- "learning_rate": 1.3816579895875052e-06,
1853
- "loss": 0.5611,
1854
- "step": 2430
1855
- },
1856
- {
1857
- "epoch": 43.01,
1858
- "learning_rate": 1.341609931918302e-06,
1859
- "loss": 0.7779,
1860
- "step": 2440
1861
- },
1862
- {
1863
- "epoch": 43.02,
1864
- "learning_rate": 1.301561874249099e-06,
1865
- "loss": 0.4958,
1866
- "step": 2450
1867
- },
1868
- {
1869
- "epoch": 43.02,
1870
- "learning_rate": 1.261513816579896e-06,
1871
- "loss": 0.4731,
1872
- "step": 2460
1873
- },
1874
- {
1875
- "epoch": 43.02,
1876
- "eval_accuracy": 0.5944700460829493,
1877
- "eval_loss": 1.1247893571853638,
1878
- "eval_runtime": 169.0052,
1879
- "eval_samples_per_second": 1.284,
1880
- "eval_steps_per_second": 0.166,
1881
- "step": 2464
1882
- },
1883
- {
1884
- "epoch": 44.0,
1885
- "learning_rate": 1.221465758910693e-06,
1886
- "loss": 0.3636,
1887
- "step": 2470
1888
- },
1889
- {
1890
- "epoch": 44.01,
1891
- "learning_rate": 1.18141770124149e-06,
1892
- "loss": 0.4359,
1893
- "step": 2480
1894
- },
1895
- {
1896
- "epoch": 44.01,
1897
- "learning_rate": 1.1413696435722869e-06,
1898
- "loss": 0.5232,
1899
- "step": 2490
1900
- },
1901
- {
1902
- "epoch": 44.01,
1903
- "learning_rate": 1.1013215859030837e-06,
1904
- "loss": 0.3694,
1905
- "step": 2500
1906
- },
1907
- {
1908
- "epoch": 44.02,
1909
- "learning_rate": 1.0612735282338806e-06,
1910
- "loss": 0.6064,
1911
- "step": 2510
1912
- },
1913
- {
1914
- "epoch": 44.02,
1915
- "learning_rate": 1.0212254705646776e-06,
1916
- "loss": 0.4623,
1917
- "step": 2520
1918
- },
1919
- {
1920
- "epoch": 44.02,
1921
- "eval_accuracy": 0.6359447004608295,
1922
- "eval_loss": 1.010471224784851,
1923
- "eval_runtime": 162.4614,
1924
- "eval_samples_per_second": 1.336,
1925
- "eval_steps_per_second": 0.172,
1926
- "step": 2520
1927
- },
1928
- {
1929
- "epoch": 45.0,
1930
- "learning_rate": 9.811774128954748e-07,
1931
- "loss": 0.4463,
1932
- "step": 2530
1933
- },
1934
- {
1935
- "epoch": 45.01,
1936
- "learning_rate": 9.411293552262717e-07,
1937
- "loss": 0.5154,
1938
- "step": 2540
1939
- },
1940
- {
1941
- "epoch": 45.01,
1942
- "learning_rate": 9.010812975570686e-07,
1943
- "loss": 0.5859,
1944
- "step": 2550
1945
- },
1946
- {
1947
- "epoch": 45.01,
1948
- "learning_rate": 8.610332398878655e-07,
1949
- "loss": 0.6416,
1950
- "step": 2560
1951
- },
1952
- {
1953
- "epoch": 45.02,
1954
- "learning_rate": 8.209851822186625e-07,
1955
- "loss": 0.5236,
1956
- "step": 2570
1957
- },
1958
- {
1959
- "epoch": 45.02,
1960
- "eval_accuracy": 0.5622119815668203,
1961
- "eval_loss": 1.2222412824630737,
1962
- "eval_runtime": 166.434,
1963
- "eval_samples_per_second": 1.304,
1964
- "eval_steps_per_second": 0.168,
1965
- "step": 2576
1966
- },
1967
- {
1968
- "epoch": 46.0,
1969
- "learning_rate": 7.809371245494595e-07,
1970
- "loss": 0.4967,
1971
- "step": 2580
1972
- },
1973
- {
1974
- "epoch": 46.01,
1975
- "learning_rate": 7.408890668802563e-07,
1976
- "loss": 0.4661,
1977
- "step": 2590
1978
- },
1979
- {
1980
- "epoch": 46.01,
1981
- "learning_rate": 7.008410092110533e-07,
1982
- "loss": 0.6802,
1983
- "step": 2600
1984
- },
1985
- {
1986
- "epoch": 46.01,
1987
- "learning_rate": 6.607929515418503e-07,
1988
- "loss": 0.3791,
1989
- "step": 2610
1990
- },
1991
- {
1992
- "epoch": 46.02,
1993
- "learning_rate": 6.207448938726472e-07,
1994
- "loss": 0.688,
1995
- "step": 2620
1996
- },
1997
- {
1998
- "epoch": 46.02,
1999
- "learning_rate": 5.806968362034441e-07,
2000
- "loss": 0.4865,
2001
- "step": 2630
2002
- },
2003
- {
2004
- "epoch": 46.02,
2005
- "eval_accuracy": 0.5668202764976958,
2006
- "eval_loss": 1.1389591693878174,
2007
- "eval_runtime": 164.5648,
2008
- "eval_samples_per_second": 1.319,
2009
- "eval_steps_per_second": 0.17,
2010
- "step": 2632
2011
- },
2012
- {
2013
- "epoch": 47.0,
2014
- "learning_rate": 5.406487785342411e-07,
2015
- "loss": 0.5445,
2016
- "step": 2640
2017
- },
2018
- {
2019
- "epoch": 47.01,
2020
- "learning_rate": 5.006007208650381e-07,
2021
- "loss": 0.445,
2022
- "step": 2650
2023
- },
2024
- {
2025
- "epoch": 47.01,
2026
- "learning_rate": 4.605526631958351e-07,
2027
- "loss": 0.4754,
2028
- "step": 2660
2029
- },
2030
- {
2031
- "epoch": 47.01,
2032
- "learning_rate": 4.20504605526632e-07,
2033
- "loss": 0.5643,
2034
- "step": 2670
2035
- },
2036
- {
2037
- "epoch": 47.02,
2038
- "learning_rate": 3.80456547857429e-07,
2039
- "loss": 0.6178,
2040
- "step": 2680
2041
- },
2042
- {
2043
- "epoch": 47.02,
2044
- "eval_accuracy": 0.5806451612903226,
2045
- "eval_loss": 1.118094801902771,
2046
- "eval_runtime": 163.1432,
2047
- "eval_samples_per_second": 1.33,
2048
- "eval_steps_per_second": 0.172,
2049
- "step": 2688
2050
- },
2051
- {
2052
- "epoch": 48.0,
2053
- "learning_rate": 3.404084901882259e-07,
2054
- "loss": 0.4701,
2055
- "step": 2690
2056
- },
2057
- {
2058
- "epoch": 48.0,
2059
- "learning_rate": 3.0036043251902283e-07,
2060
- "loss": 0.3917,
2061
- "step": 2700
2062
- },
2063
- {
2064
- "epoch": 48.01,
2065
- "learning_rate": 2.603123748498198e-07,
2066
- "loss": 0.5181,
2067
- "step": 2710
2068
- },
2069
- {
2070
- "epoch": 48.01,
2071
- "learning_rate": 2.2026431718061676e-07,
2072
- "loss": 0.5627,
2073
- "step": 2720
2074
- },
2075
- {
2076
- "epoch": 48.02,
2077
- "learning_rate": 1.802162595114137e-07,
2078
- "loss": 0.449,
2079
- "step": 2730
2080
- },
2081
- {
2082
- "epoch": 48.02,
2083
- "learning_rate": 1.4016820184221066e-07,
2084
- "loss": 0.6068,
2085
- "step": 2740
2086
- },
2087
- {
2088
- "epoch": 48.02,
2089
- "eval_accuracy": 0.6036866359447005,
2090
- "eval_loss": 1.0918972492218018,
2091
- "eval_runtime": 166.7399,
2092
- "eval_samples_per_second": 1.301,
2093
- "eval_steps_per_second": 0.168,
2094
- "step": 2744
2095
- },
2096
- {
2097
- "epoch": 49.0,
2098
- "learning_rate": 1.0012014417300762e-07,
2099
- "loss": 0.4975,
2100
- "step": 2750
2101
- },
2102
- {
2103
- "epoch": 49.01,
2104
- "learning_rate": 6.007208650380457e-08,
2105
- "loss": 0.4256,
2106
- "step": 2760
2107
- },
2108
- {
2109
- "epoch": 49.01,
2110
- "learning_rate": 2.0024028834601525e-08,
2111
- "loss": 0.4835,
2112
- "step": 2770
2113
- },
2114
- {
2115
- "epoch": 49.01,
2116
- "eval_accuracy": 0.5852534562211982,
2117
- "eval_loss": 1.0932610034942627,
2118
- "eval_runtime": 164.2231,
2119
- "eval_samples_per_second": 1.321,
2120
- "eval_steps_per_second": 0.17,
2121
- "step": 2775
2122
- },
2123
- {
2124
- "epoch": 49.01,
2125
- "step": 2775,
2126
- "total_flos": 2.7602315185605673e+19,
2127
- "train_loss": 0.8102743840432381,
2128
- "train_runtime": 28793.9081,
2129
- "train_samples_per_second": 0.771,
2130
- "train_steps_per_second": 0.096
2131
- },
2132
- {
2133
- "epoch": 49.01,
2134
- "eval_accuracy": 0.6805555555555556,
2135
- "eval_loss": 0.7703173160552979,
2136
- "eval_runtime": 171.6337,
2137
- "eval_samples_per_second": 1.258,
2138
- "eval_steps_per_second": 0.157,
2139
- "step": 2775
2140
- },
2141
- {
2142
- "epoch": 49.01,
2143
- "eval_accuracy": 0.6805555555555556,
2144
- "eval_loss": 0.7703171968460083,
2145
- "eval_runtime": 162.7205,
2146
- "eval_samples_per_second": 1.327,
2147
- "eval_steps_per_second": 0.166,
2148
- "step": 2775
2149
  }
2150
  ],
2151
  "logging_steps": 10,
2152
- "max_steps": 2775,
2153
  "num_input_tokens_seen": 0,
2154
  "num_train_epochs": 9223372036854775807,
2155
  "save_steps": 500,
2156
- "total_flos": 2.7602315185605673e+19,
2157
  "train_batch_size": 8,
2158
  "trial_name": null,
2159
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7419354838709677,
3
  "best_model_checkpoint": "videomae-base-finetuned-subset-check10/checkpoint-896",
4
+ "epoch": 19.041441441441442,
5
  "eval_steps": 500,
6
+ "global_step": 1110,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.01,
13
+ "learning_rate": 9.00900900900901e-07,
14
+ "loss": 1.6695,
15
  "step": 10
16
  },
17
  {
18
+ "epoch": 0.02,
19
+ "learning_rate": 1.801801801801802e-06,
20
+ "loss": 1.6945,
21
  "step": 20
22
  },
23
  {
24
+ "epoch": 0.03,
25
+ "learning_rate": 2.702702702702703e-06,
26
+ "loss": 1.6484,
27
  "step": 30
28
  },
29
  {
30
+ "epoch": 0.04,
31
+ "learning_rate": 3.603603603603604e-06,
32
+ "loss": 1.6661,
33
  "step": 40
34
  },
35
  {
36
+ "epoch": 0.05,
37
+ "learning_rate": 4.504504504504505e-06,
38
+ "loss": 1.6348,
39
  "step": 50
40
  },
41
  {
42
+ "epoch": 0.05,
43
+ "eval_accuracy": 0.511520737327189,
44
+ "eval_loss": 1.544296383857727,
45
+ "eval_runtime": 169.659,
46
+ "eval_samples_per_second": 1.279,
47
  "eval_steps_per_second": 0.165,
48
  "step": 56
49
  },
50
  {
51
  "epoch": 1.0,
52
+ "learning_rate": 5.405405405405406e-06,
53
+ "loss": 1.5964,
54
  "step": 60
55
  },
56
  {
57
  "epoch": 1.01,
58
+ "learning_rate": 6.3063063063063065e-06,
59
+ "loss": 1.6218,
60
  "step": 70
61
  },
62
  {
63
+ "epoch": 1.02,
64
+ "learning_rate": 7.207207207207208e-06,
65
+ "loss": 1.6405,
66
  "step": 80
67
  },
68
  {
69
+ "epoch": 1.03,
70
+ "learning_rate": 8.108108108108109e-06,
71
+ "loss": 1.6203,
72
  "step": 90
73
  },
74
  {
75
+ "epoch": 1.04,
76
+ "learning_rate": 9.00900900900901e-06,
77
+ "loss": 1.6147,
78
  "step": 100
79
  },
80
  {
81
+ "epoch": 1.05,
82
+ "learning_rate": 9.90990990990991e-06,
83
+ "loss": 1.6015,
84
  "step": 110
85
  },
86
  {
87
+ "epoch": 1.05,
88
+ "eval_accuracy": 0.4792626728110599,
89
+ "eval_loss": 1.529832124710083,
90
+ "eval_runtime": 163.2712,
91
+ "eval_samples_per_second": 1.329,
92
+ "eval_steps_per_second": 0.171,
93
  "step": 112
94
  },
95
  {
96
+ "epoch": 2.01,
97
+ "learning_rate": 9.90990990990991e-06,
98
+ "loss": 1.5899,
99
  "step": 120
100
  },
101
  {
102
+ "epoch": 2.02,
103
+ "learning_rate": 9.80980980980981e-06,
104
+ "loss": 1.6095,
105
  "step": 130
106
  },
107
  {
108
+ "epoch": 2.03,
109
+ "learning_rate": 9.70970970970971e-06,
110
+ "loss": 1.613,
111
  "step": 140
112
  },
113
  {
114
+ "epoch": 2.03,
115
+ "learning_rate": 9.60960960960961e-06,
116
+ "loss": 1.5829,
117
  "step": 150
118
  },
119
  {
120
+ "epoch": 2.04,
121
+ "learning_rate": 9.50950950950951e-06,
122
+ "loss": 1.5683,
123
  "step": 160
124
  },
125
  {
126
+ "epoch": 2.05,
127
+ "eval_accuracy": 0.3271889400921659,
128
+ "eval_loss": 1.5288971662521362,
129
+ "eval_runtime": 162.8221,
130
+ "eval_samples_per_second": 1.333,
131
+ "eval_steps_per_second": 0.172,
132
  "step": 168
133
  },
134
  {
135
  "epoch": 3.0,
136
+ "learning_rate": 9.40940940940941e-06,
137
+ "loss": 1.5958,
138
  "step": 170
139
  },
140
  {
141
+ "epoch": 3.01,
142
+ "learning_rate": 9.30930930930931e-06,
143
+ "loss": 1.5487,
144
  "step": 180
145
  },
146
  {
147
+ "epoch": 3.02,
148
+ "learning_rate": 9.20920920920921e-06,
149
+ "loss": 1.5407,
150
  "step": 190
151
  },
152
  {
153
+ "epoch": 3.03,
154
+ "learning_rate": 9.10910910910911e-06,
155
+ "loss": 1.5352,
156
  "step": 200
157
  },
158
  {
159
+ "epoch": 3.04,
160
+ "learning_rate": 9.00900900900901e-06,
161
+ "loss": 1.4537,
162
  "step": 210
163
  },
164
  {
165
+ "epoch": 3.05,
166
+ "learning_rate": 8.90890890890891e-06,
167
+ "loss": 1.512,
168
  "step": 220
169
  },
170
  {
171
+ "epoch": 3.05,
172
+ "eval_accuracy": 0.631336405529954,
173
+ "eval_loss": 1.302922248840332,
174
+ "eval_runtime": 162.9796,
175
+ "eval_samples_per_second": 1.331,
176
+ "eval_steps_per_second": 0.172,
177
  "step": 224
178
  },
179
  {
180
+ "epoch": 4.01,
181
+ "learning_rate": 8.80880880880881e-06,
182
+ "loss": 1.4831,
183
  "step": 230
184
  },
185
  {
186
  "epoch": 4.01,
187
+ "learning_rate": 8.70870870870871e-06,
188
+ "loss": 1.3654,
189
  "step": 240
190
  },
191
  {
192
+ "epoch": 4.02,
193
+ "learning_rate": 8.60860860860861e-06,
194
+ "loss": 1.3439,
195
  "step": 250
196
  },
197
  {
198
+ "epoch": 4.03,
199
+ "learning_rate": 8.50850850850851e-06,
200
+ "loss": 1.2985,
201
  "step": 260
202
  },
203
  {
204
+ "epoch": 4.04,
205
+ "learning_rate": 8.408408408408409e-06,
206
+ "loss": 1.2316,
207
  "step": 270
208
  },
209
  {
210
+ "epoch": 4.05,
211
+ "learning_rate": 8.308308308308309e-06,
212
+ "loss": 1.2309,
213
  "step": 280
214
  },
215
  {
216
+ "epoch": 4.05,
217
+ "eval_accuracy": 0.5207373271889401,
218
+ "eval_loss": 1.21807861328125,
219
+ "eval_runtime": 162.5548,
220
+ "eval_samples_per_second": 1.335,
221
+ "eval_steps_per_second": 0.172,
222
  "step": 280
223
  },
224
  {
225
+ "epoch": 5.01,
226
+ "learning_rate": 8.208208208208209e-06,
227
+ "loss": 1.3147,
228
  "step": 290
229
  },
230
  {
231
+ "epoch": 5.02,
232
+ "learning_rate": 8.108108108108109e-06,
233
+ "loss": 1.2977,
234
  "step": 300
235
  },
236
  {
237
+ "epoch": 5.03,
238
+ "learning_rate": 8.00800800800801e-06,
239
+ "loss": 1.2603,
240
  "step": 310
241
  },
242
  {
243
+ "epoch": 5.04,
244
+ "learning_rate": 7.90790790790791e-06,
245
+ "loss": 1.1538,
246
  "step": 320
247
  },
248
  {
249
+ "epoch": 5.05,
250
+ "learning_rate": 7.807807807807808e-06,
251
+ "loss": 1.1949,
252
  "step": 330
253
  },
254
  {
255
+ "epoch": 5.05,
256
+ "eval_accuracy": 0.6589861751152074,
257
+ "eval_loss": 1.0441014766693115,
258
+ "eval_runtime": 161.7405,
259
+ "eval_samples_per_second": 1.342,
260
+ "eval_steps_per_second": 0.173,
261
  "step": 336
262
  },
263
  {
264
  "epoch": 6.0,
265
+ "learning_rate": 7.707707707707708e-06,
266
+ "loss": 1.2623,
267
  "step": 340
268
  },
269
  {
270
  "epoch": 6.01,
271
+ "learning_rate": 7.607607607607608e-06,
272
+ "loss": 1.1608,
273
  "step": 350
274
  },
275
  {
276
+ "epoch": 6.02,
277
+ "learning_rate": 7.507507507507507e-06,
278
+ "loss": 1.1801,
279
  "step": 360
280
  },
281
  {
282
+ "epoch": 6.03,
283
+ "learning_rate": 7.4074074074074075e-06,
284
+ "loss": 1.0873,
285
  "step": 370
286
  },
287
  {
288
+ "epoch": 6.04,
289
+ "learning_rate": 7.307307307307308e-06,
290
+ "loss": 1.0739,
291
  "step": 380
292
  },
293
  {
294
+ "epoch": 6.05,
295
+ "learning_rate": 7.207207207207208e-06,
296
+ "loss": 1.0987,
297
  "step": 390
298
  },
299
  {
300
+ "epoch": 6.05,
301
+ "eval_accuracy": 0.6129032258064516,
302
+ "eval_loss": 1.004054307937622,
303
+ "eval_runtime": 161.9536,
304
+ "eval_samples_per_second": 1.34,
305
+ "eval_steps_per_second": 0.173,
306
  "step": 392
307
  },
308
  {
309
+ "epoch": 7.01,
310
+ "learning_rate": 7.107107107107107e-06,
311
+ "loss": 1.142,
312
  "step": 400
313
  },
314
  {
315
+ "epoch": 7.02,
316
+ "learning_rate": 7.007007007007007e-06,
317
+ "loss": 1.0111,
318
  "step": 410
319
  },
320
  {
321
+ "epoch": 7.03,
322
+ "learning_rate": 6.906906906906907e-06,
323
+ "loss": 1.0603,
324
  "step": 420
325
  },
326
  {
327
+ "epoch": 7.03,
328
+ "learning_rate": 6.8068068068068075e-06,
329
+ "loss": 1.185,
330
  "step": 430
331
  },
332
  {
333
+ "epoch": 7.04,
334
+ "learning_rate": 6.706706706706707e-06,
335
+ "loss": 1.148,
336
  "step": 440
337
  },
338
  {
339
+ "epoch": 7.05,
340
+ "eval_accuracy": 0.6175115207373272,
341
+ "eval_loss": 1.0258899927139282,
342
+ "eval_runtime": 163.4987,
343
+ "eval_samples_per_second": 1.327,
344
+ "eval_steps_per_second": 0.171,
345
  "step": 448
346
  },
347
  {
348
  "epoch": 8.0,
349
+ "learning_rate": 6.606606606606607e-06,
350
+ "loss": 1.1361,
351
  "step": 450
352
  },
353
  {
354
+ "epoch": 8.01,
355
+ "learning_rate": 6.506506506506507e-06,
356
+ "loss": 0.9612,
357
  "step": 460
358
  },
359
  {
360
+ "epoch": 8.02,
361
+ "learning_rate": 6.406406406406407e-06,
362
+ "loss": 1.0823,
363
  "step": 470
364
  },
365
  {
366
+ "epoch": 8.03,
367
+ "learning_rate": 6.3063063063063065e-06,
368
+ "loss": 1.21,
369
  "step": 480
370
  },
371
  {
372
+ "epoch": 8.04,
373
+ "learning_rate": 6.206206206206207e-06,
374
+ "loss": 0.9171,
375
  "step": 490
376
  },
377
  {
378
+ "epoch": 8.05,
379
+ "learning_rate": 6.106106106106107e-06,
380
+ "loss": 0.9958,
381
  "step": 500
382
  },
383
  {
384
+ "epoch": 8.05,
385
+ "eval_accuracy": 0.6728110599078341,
386
+ "eval_loss": 0.9507986307144165,
387
+ "eval_runtime": 161.9408,
388
+ "eval_samples_per_second": 1.34,
389
+ "eval_steps_per_second": 0.173,
390
  "step": 504
391
  },
392
  {
393
+ "epoch": 9.01,
394
+ "learning_rate": 6.006006006006007e-06,
395
+ "loss": 0.9985,
396
  "step": 510
397
  },
398
  {
399
  "epoch": 9.01,
400
+ "learning_rate": 5.905905905905906e-06,
401
+ "loss": 1.1594,
402
  "step": 520
403
  },
404
  {
405
+ "epoch": 9.02,
406
+ "learning_rate": 5.805805805805806e-06,
407
+ "loss": 1.0034,
408
  "step": 530
409
  },
410
  {
411
+ "epoch": 9.03,
412
+ "learning_rate": 5.7057057057057065e-06,
413
+ "loss": 0.9761,
414
  "step": 540
415
  },
416
  {
417
+ "epoch": 9.04,
418
+ "learning_rate": 5.605605605605607e-06,
419
+ "loss": 0.8942,
420
  "step": 550
421
  },
422
  {
423
+ "epoch": 9.05,
424
+ "learning_rate": 5.505505505505506e-06,
425
+ "loss": 1.0856,
426
  "step": 560
427
  },
428
  {
429
+ "epoch": 9.05,
430
+ "eval_accuracy": 0.5944700460829493,
431
+ "eval_loss": 1.0041232109069824,
432
+ "eval_runtime": 162.8782,
433
+ "eval_samples_per_second": 1.332,
434
+ "eval_steps_per_second": 0.172,
435
  "step": 560
436
  },
437
  {
438
+ "epoch": 10.01,
439
+ "learning_rate": 5.405405405405406e-06,
440
+ "loss": 0.972,
441
  "step": 570
442
  },
443
  {
444
+ "epoch": 10.02,
445
+ "learning_rate": 5.305305305305306e-06,
446
+ "loss": 1.0112,
447
  "step": 580
448
  },
449
  {
450
+ "epoch": 10.03,
451
+ "learning_rate": 5.205205205205206e-06,
452
+ "loss": 0.9801,
453
  "step": 590
454
  },
455
  {
456
+ "epoch": 10.04,
457
+ "learning_rate": 5.105105105105106e-06,
458
+ "loss": 0.8615,
459
  "step": 600
460
  },
461
  {
462
+ "epoch": 10.05,
463
+ "learning_rate": 5.005005005005006e-06,
464
+ "loss": 0.8809,
465
  "step": 610
466
  },
467
  {
468
+ "epoch": 10.05,
469
+ "eval_accuracy": 0.6359447004608295,
470
+ "eval_loss": 0.9638356566429138,
471
+ "eval_runtime": 161.5478,
472
+ "eval_samples_per_second": 1.343,
473
+ "eval_steps_per_second": 0.173,
474
  "step": 616
475
  },
476
  {
477
  "epoch": 11.0,
478
+ "learning_rate": 4.904904904904905e-06,
479
+ "loss": 0.8911,
480
  "step": 620
481
  },
482
  {
483
  "epoch": 11.01,
484
+ "learning_rate": 4.804804804804805e-06,
485
+ "loss": 0.8545,
486
  "step": 630
487
  },
488
  {
489
+ "epoch": 11.02,
490
+ "learning_rate": 4.704704704704705e-06,
491
+ "loss": 0.8887,
492
  "step": 640
493
  },
494
  {
495
+ "epoch": 11.03,
496
+ "learning_rate": 4.604604604604605e-06,
497
+ "loss": 0.8687,
498
  "step": 650
499
  },
500
  {
501
+ "epoch": 11.04,
502
+ "learning_rate": 4.504504504504505e-06,
503
+ "loss": 0.8959,
504
  "step": 660
505
  },
506
  {
507
+ "epoch": 11.05,
508
+ "learning_rate": 4.404404404404405e-06,
509
+ "loss": 0.9185,
510
  "step": 670
511
  },
512
  {
513
+ "epoch": 11.05,
514
+ "eval_accuracy": 0.6820276497695853,
515
+ "eval_loss": 0.9248147010803223,
516
+ "eval_runtime": 161.5706,
517
+ "eval_samples_per_second": 1.343,
518
+ "eval_steps_per_second": 0.173,
519
  "step": 672
520
  },
521
  {
522
+ "epoch": 12.01,
523
+ "learning_rate": 4.304304304304305e-06,
524
+ "loss": 0.9707,
525
  "step": 680
526
  },
527
  {
528
+ "epoch": 12.02,
529
+ "learning_rate": 4.204204204204204e-06,
530
+ "loss": 0.7837,
531
  "step": 690
532
  },
533
  {
534
+ "epoch": 12.03,
535
+ "learning_rate": 4.1041041041041045e-06,
536
+ "loss": 0.9325,
537
  "step": 700
538
  },
539
  {
540
+ "epoch": 12.03,
541
+ "learning_rate": 4.004004004004005e-06,
542
+ "loss": 0.8425,
543
  "step": 710
544
  },
545
  {
546
+ "epoch": 12.04,
547
+ "learning_rate": 3.903903903903904e-06,
548
+ "loss": 0.9136,
549
  "step": 720
550
  },
551
  {
552
+ "epoch": 12.05,
553
+ "eval_accuracy": 0.6728110599078341,
554
+ "eval_loss": 1.0136313438415527,
555
+ "eval_runtime": 162.7529,
556
+ "eval_samples_per_second": 1.333,
557
+ "eval_steps_per_second": 0.172,
558
  "step": 728
559
  },
560
  {
561
  "epoch": 13.0,
562
+ "learning_rate": 3.803803803803804e-06,
563
+ "loss": 0.9652,
564
  "step": 730
565
  },
566
  {
567
+ "epoch": 13.01,
568
+ "learning_rate": 3.7037037037037037e-06,
569
+ "loss": 0.8421,
570
  "step": 740
571
  },
572
  {
573
+ "epoch": 13.02,
574
+ "learning_rate": 3.603603603603604e-06,
575
+ "loss": 0.9862,
576
  "step": 750
577
  },
578
  {
579
+ "epoch": 13.03,
580
+ "learning_rate": 3.5035035035035036e-06,
581
+ "loss": 0.9638,
582
  "step": 760
583
  },
584
  {
585
+ "epoch": 13.04,
586
+ "learning_rate": 3.4034034034034037e-06,
587
+ "loss": 0.8826,
588
  "step": 770
589
  },
590
  {
591
+ "epoch": 13.05,
592
+ "learning_rate": 3.3033033033033035e-06,
593
+ "loss": 0.8537,
594
  "step": 780
595
  },
596
  {
597
+ "epoch": 13.05,
598
+ "eval_accuracy": 0.7188940092165899,
599
+ "eval_loss": 0.8515194058418274,
600
+ "eval_runtime": 164.1283,
601
  "eval_samples_per_second": 1.322,
602
  "eval_steps_per_second": 0.171,
603
  "step": 784
604
  },
605
  {
606
+ "epoch": 14.01,
607
+ "learning_rate": 3.2032032032032036e-06,
608
+ "loss": 0.9158,
609
  "step": 790
610
  },
611
  {
612
  "epoch": 14.01,
613
+ "learning_rate": 3.1031031031031033e-06,
614
+ "loss": 0.8456,
615
  "step": 800
616
  },
617
  {
618
+ "epoch": 14.02,
619
+ "learning_rate": 3.0030030030030034e-06,
620
+ "loss": 0.8235,
621
  "step": 810
622
  },
623
  {
624
+ "epoch": 14.03,
625
+ "learning_rate": 2.902902902902903e-06,
626
+ "loss": 0.7452,
627
  "step": 820
628
  },
629
  {
630
+ "epoch": 14.04,
631
+ "learning_rate": 2.8028028028028033e-06,
632
+ "loss": 0.8251,
633
  "step": 830
634
  },
635
  {
636
+ "epoch": 14.05,
637
+ "learning_rate": 2.702702702702703e-06,
638
+ "loss": 0.7921,
639
  "step": 840
640
  },
641
  {
642
+ "epoch": 14.05,
643
+ "eval_accuracy": 0.7004608294930875,
644
+ "eval_loss": 0.8222436308860779,
645
+ "eval_runtime": 161.8478,
646
+ "eval_samples_per_second": 1.341,
647
+ "eval_steps_per_second": 0.173,
648
  "step": 840
649
  },
650
  {
651
+ "epoch": 15.01,
652
+ "learning_rate": 2.602602602602603e-06,
653
+ "loss": 0.9356,
654
  "step": 850
655
  },
656
  {
657
+ "epoch": 15.02,
658
+ "learning_rate": 2.502502502502503e-06,
659
+ "loss": 0.8556,
660
  "step": 860
661
  },
662
  {
663
+ "epoch": 15.03,
664
+ "learning_rate": 2.4024024024024026e-06,
665
+ "loss": 0.7946,
666
  "step": 870
667
  },
668
  {
669
+ "epoch": 15.04,
670
+ "learning_rate": 2.3023023023023023e-06,
671
+ "loss": 0.7411,
672
  "step": 880
673
  },
674
  {
675
+ "epoch": 15.05,
676
+ "learning_rate": 2.2022022022022024e-06,
677
+ "loss": 0.7313,
678
  "step": 890
679
  },
680
  {
681
+ "epoch": 15.05,
682
+ "eval_accuracy": 0.7419354838709677,
683
+ "eval_loss": 0.7512253522872925,
684
+ "eval_runtime": 161.7824,
685
+ "eval_samples_per_second": 1.341,
686
+ "eval_steps_per_second": 0.173,
687
  "step": 896
688
  },
689
  {
690
  "epoch": 16.0,
691
+ "learning_rate": 2.102102102102102e-06,
692
+ "loss": 0.8169,
693
  "step": 900
694
  },
695
  {
696
  "epoch": 16.01,
697
+ "learning_rate": 2.0020020020020023e-06,
698
+ "loss": 0.7236,
699
  "step": 910
700
  },
701
  {
702
+ "epoch": 16.02,
703
+ "learning_rate": 1.901901901901902e-06,
704
+ "loss": 0.8335,
705
  "step": 920
706
  },
707
  {
708
+ "epoch": 16.03,
709
+ "learning_rate": 1.801801801801802e-06,
710
+ "loss": 0.8666,
711
  "step": 930
712
  },
713
  {
714
+ "epoch": 16.04,
715
+ "learning_rate": 1.7017017017017019e-06,
716
+ "loss": 0.8429,
717
  "step": 940
718
  },
719
  {
720
+ "epoch": 16.05,
721
+ "learning_rate": 1.6016016016016018e-06,
722
+ "loss": 0.5998,
723
  "step": 950
724
  },
725
  {
726
+ "epoch": 16.05,
727
+ "eval_accuracy": 0.6129032258064516,
728
+ "eval_loss": 0.9409515857696533,
729
+ "eval_runtime": 161.5869,
730
+ "eval_samples_per_second": 1.343,
731
+ "eval_steps_per_second": 0.173,
732
  "step": 952
733
  },
734
  {
735
+ "epoch": 17.01,
736
+ "learning_rate": 1.5015015015015017e-06,
737
+ "loss": 0.8206,
738
  "step": 960
739
  },
740
  {
741
+ "epoch": 17.02,
742
+ "learning_rate": 1.4014014014014016e-06,
743
+ "loss": 0.7474,
744
  "step": 970
745
  },
746
  {
747
+ "epoch": 17.03,
748
+ "learning_rate": 1.3013013013013016e-06,
749
+ "loss": 0.7778,
750
  "step": 980
751
  },
752
  {
753
+ "epoch": 17.03,
754
+ "learning_rate": 1.2012012012012013e-06,
755
+ "loss": 0.7003,
756
  "step": 990
757
  },
758
  {
759
+ "epoch": 17.04,
760
+ "learning_rate": 1.1011011011011012e-06,
761
+ "loss": 0.8093,
762
  "step": 1000
763
  },
764
  {
765
+ "epoch": 17.05,
766
+ "eval_accuracy": 0.7050691244239631,
767
+ "eval_loss": 0.8144727349281311,
768
+ "eval_runtime": 161.5502,
769
+ "eval_samples_per_second": 1.343,
770
+ "eval_steps_per_second": 0.173,
771
  "step": 1008
772
  },
773
  {
774
  "epoch": 18.0,
775
+ "learning_rate": 1.0010010010010011e-06,
776
+ "loss": 0.8227,
777
  "step": 1010
778
  },
779
  {
780
+ "epoch": 18.01,
781
+ "learning_rate": 9.00900900900901e-07,
782
+ "loss": 0.8949,
783
  "step": 1020
784
  },
785
  {
786
+ "epoch": 18.02,
787
+ "learning_rate": 8.008008008008009e-07,
788
+ "loss": 0.6573,
789
  "step": 1030
790
  },
791
  {
792
+ "epoch": 18.03,
793
+ "learning_rate": 7.007007007007008e-07,
794
+ "loss": 0.8836,
795
  "step": 1040
796
  },
797
  {
798
+ "epoch": 18.04,
799
+ "learning_rate": 6.006006006006006e-07,
800
+ "loss": 0.7605,
801
  "step": 1050
802
  },
803
  {
804
+ "epoch": 18.05,
805
+ "learning_rate": 5.005005005005006e-07,
806
+ "loss": 0.604,
807
  "step": 1060
808
  },
809
  {
810
+ "epoch": 18.05,
811
+ "eval_accuracy": 0.6820276497695853,
812
+ "eval_loss": 0.9013708233833313,
813
+ "eval_runtime": 162.7504,
814
+ "eval_samples_per_second": 1.333,
815
+ "eval_steps_per_second": 0.172,
816
  "step": 1064
817
  },
818
  {
819
+ "epoch": 19.01,
820
+ "learning_rate": 4.0040040040040045e-07,
821
+ "loss": 0.7955,
822
  "step": 1070
823
  },
824
  {
825
  "epoch": 19.01,
826
+ "learning_rate": 3.003003003003003e-07,
827
+ "loss": 0.9212,
828
  "step": 1080
829
  },
830
  {
831
+ "epoch": 19.02,
832
+ "learning_rate": 2.0020020020020022e-07,
833
+ "loss": 0.9198,
834
  "step": 1090
835
  },
836
  {
837
+ "epoch": 19.03,
838
+ "learning_rate": 1.0010010010010011e-07,
839
+ "loss": 0.5744,
840
  "step": 1100
841
  },
842
  {
843
+ "epoch": 19.04,
844
+ "learning_rate": 0.0,
845
+ "loss": 0.7066,
846
  "step": 1110
847
  },
848
  {
849
+ "epoch": 19.04,
850
+ "eval_accuracy": 0.6682027649769585,
851
+ "eval_loss": 0.893532931804657,
852
+ "eval_runtime": 167.9329,
853
+ "eval_samples_per_second": 1.292,
854
+ "eval_steps_per_second": 0.167,
855
+ "step": 1110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
856
  },
857
  {
858
+ "epoch": 19.04,
859
+ "step": 1110,
860
+ "total_flos": 1.1041673732998595e+19,
861
+ "train_loss": 1.0829452497465117,
862
+ "train_runtime": 11433.1912,
863
+ "train_samples_per_second": 0.777,
864
+ "train_steps_per_second": 0.097
865
  },
866
  {
867
+ "epoch": 19.04,
868
+ "eval_accuracy": 0.6666666666666666,
869
+ "eval_loss": 0.6926783919334412,
870
+ "eval_runtime": 164.6443,
871
  "eval_samples_per_second": 1.312,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
872
  "eval_steps_per_second": 0.164,
873
+ "step": 1110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
  },
875
  {
876
+ "epoch": 19.04,
877
+ "eval_accuracy": 0.6666666666666666,
878
+ "eval_loss": 0.6926783919334412,
879
+ "eval_runtime": 163.2396,
880
+ "eval_samples_per_second": 1.323,
881
+ "eval_steps_per_second": 0.165,
882
+ "step": 1110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
883
  }
884
  ],
885
  "logging_steps": 10,
886
+ "max_steps": 1110,
887
  "num_input_tokens_seen": 0,
888
  "num_train_epochs": 9223372036854775807,
889
  "save_steps": 500,
890
+ "total_flos": 1.1041673732998595e+19,
891
  "train_batch_size": 8,
892
  "trial_name": null,
893
  "trial_params": null