bliebfl commited on
Commit
e4858b5
1 Parent(s): 25861c0

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 9.98,
3
- "eval_accuracy": 0.9405358686257562,
4
- "eval_loss": 0.3180586099624634,
5
- "eval_runtime": 38.4742,
6
- "eval_samples_per_second": 150.361,
7
- "eval_steps_per_second": 4.704,
8
  "total_flos": 1.3140619208067262e+19,
9
- "train_loss": 1.0548507860728673,
10
- "train_runtime": 4291.8856,
11
- "train_samples_per_second": 121.301,
12
- "train_steps_per_second": 0.946
13
  }
 
1
  {
2
  "epoch": 9.98,
3
+ "eval_accuracy": 0.9676750216076059,
4
+ "eval_loss": 0.17719660699367523,
5
+ "eval_runtime": 39.5245,
6
+ "eval_samples_per_second": 146.365,
7
+ "eval_steps_per_second": 4.579,
8
  "total_flos": 1.3140619208067262e+19,
9
+ "train_loss": 0.6519044913681857,
10
+ "train_runtime": 4257.1599,
11
+ "train_samples_per_second": 122.29,
12
+ "train_steps_per_second": 0.954
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.98,
3
- "eval_accuracy": 0.9405358686257562,
4
- "eval_loss": 0.3180586099624634,
5
- "eval_runtime": 38.4742,
6
- "eval_samples_per_second": 150.361,
7
- "eval_steps_per_second": 4.704
8
  }
 
1
  {
2
  "epoch": 9.98,
3
+ "eval_accuracy": 0.9676750216076059,
4
+ "eval_loss": 0.17719660699367523,
5
+ "eval_runtime": 39.5245,
6
+ "eval_samples_per_second": 146.365,
7
+ "eval_steps_per_second": 4.579
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98a51a57fc178dbdb002a5ebb4e723eb886ab624410b0670278008dde8af634d
3
  size 112253100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83a41aa2fe7fb29377d5a39f74ecb8c77f954f774989e488dc7341910f1d415f
3
  size 112253100
runs/Dec14_22-06-10_bc148a6f3d4e/events.out.tfevents.1702600361.bc148a6f3d4e.163484.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32b4e4b7659b50fe66b8615b452a3a76ff415578879db6cf25f51dbaf082cfe0
3
+ size 36401
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.98,
3
  "total_flos": 1.3140619208067262e+19,
4
- "train_loss": 1.0548507860728673,
5
- "train_runtime": 4291.8856,
6
- "train_samples_per_second": 121.301,
7
- "train_steps_per_second": 0.946
8
  }
 
1
  {
2
  "epoch": 9.98,
3
  "total_flos": 1.3140619208067262e+19,
4
+ "train_loss": 0.6519044913681857,
5
+ "train_runtime": 4257.1599,
6
+ "train_samples_per_second": 122.29,
7
+ "train_steps_per_second": 0.954
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9405358686257562,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-3660",
4
  "epoch": 9.981561155500922,
5
  "eval_steps": 500,
6
  "global_step": 4060,
@@ -11,2537 +11,2537 @@
11
  {
12
  "epoch": 0.02,
13
  "learning_rate": 1.2315270935960593e-06,
14
- "loss": 1.8272,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.05,
19
  "learning_rate": 2.4630541871921186e-06,
20
- "loss": 1.7835,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.07,
25
  "learning_rate": 3.6945812807881777e-06,
26
- "loss": 1.751,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.1,
31
  "learning_rate": 4.926108374384237e-06,
32
- "loss": 1.7539,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.12,
37
  "learning_rate": 6.157635467980296e-06,
38
- "loss": 1.7125,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.15,
43
  "learning_rate": 7.3891625615763555e-06,
44
- "loss": 1.6708,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.17,
49
  "learning_rate": 8.620689655172414e-06,
50
- "loss": 1.7727,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.2,
55
  "learning_rate": 9.852216748768475e-06,
56
- "loss": 1.6377,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.22,
61
  "learning_rate": 1.1083743842364533e-05,
62
- "loss": 1.8024,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.25,
67
  "learning_rate": 1.2315270935960592e-05,
68
- "loss": 1.7476,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.27,
73
  "learning_rate": 1.354679802955665e-05,
74
- "loss": 1.6509,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.3,
79
  "learning_rate": 1.4778325123152711e-05,
80
- "loss": 1.7052,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.32,
85
  "learning_rate": 1.6009852216748768e-05,
86
- "loss": 1.7003,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.34,
91
  "learning_rate": 1.7241379310344828e-05,
92
- "loss": 1.7424,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 0.37,
97
  "learning_rate": 1.847290640394089e-05,
98
- "loss": 1.7223,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 0.39,
103
  "learning_rate": 1.970443349753695e-05,
104
- "loss": 1.6932,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 0.42,
109
  "learning_rate": 2.0935960591133006e-05,
110
- "loss": 1.6393,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 0.44,
115
  "learning_rate": 2.2167487684729066e-05,
116
- "loss": 1.7023,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 0.47,
121
  "learning_rate": 2.3399014778325123e-05,
122
- "loss": 1.6219,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 0.49,
127
  "learning_rate": 2.4630541871921184e-05,
128
- "loss": 1.7031,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 0.52,
133
  "learning_rate": 2.5862068965517244e-05,
134
- "loss": 1.7309,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 0.54,
139
  "learning_rate": 2.70935960591133e-05,
140
- "loss": 1.7251,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 0.57,
145
  "learning_rate": 2.8325123152709358e-05,
146
- "loss": 1.7039,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 0.59,
151
  "learning_rate": 2.9556650246305422e-05,
152
- "loss": 1.6753,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 0.61,
157
  "learning_rate": 3.078817733990148e-05,
158
- "loss": 1.7149,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 0.64,
163
  "learning_rate": 3.2019704433497536e-05,
164
- "loss": 1.7101,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 0.66,
169
  "learning_rate": 3.3251231527093596e-05,
170
- "loss": 1.6208,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 0.69,
175
  "learning_rate": 3.4482758620689657e-05,
176
- "loss": 1.5855,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 0.71,
181
  "learning_rate": 3.571428571428572e-05,
182
- "loss": 1.741,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 0.74,
187
  "learning_rate": 3.694581280788178e-05,
188
- "loss": 1.6925,
189
  "step": 300
190
  },
191
  {
192
  "epoch": 0.76,
193
  "learning_rate": 3.817733990147783e-05,
194
- "loss": 1.6607,
195
  "step": 310
196
  },
197
  {
198
  "epoch": 0.79,
199
  "learning_rate": 3.94088669950739e-05,
200
- "loss": 1.5157,
201
  "step": 320
202
  },
203
  {
204
  "epoch": 0.81,
205
  "learning_rate": 4.064039408866995e-05,
206
- "loss": 1.5652,
207
  "step": 330
208
  },
209
  {
210
  "epoch": 0.84,
211
  "learning_rate": 4.187192118226601e-05,
212
- "loss": 1.6547,
213
  "step": 340
214
  },
215
  {
216
  "epoch": 0.86,
217
  "learning_rate": 4.3103448275862066e-05,
218
- "loss": 1.5618,
219
  "step": 350
220
  },
221
  {
222
  "epoch": 0.89,
223
  "learning_rate": 4.433497536945813e-05,
224
- "loss": 1.6555,
225
  "step": 360
226
  },
227
  {
228
  "epoch": 0.91,
229
  "learning_rate": 4.5566502463054186e-05,
230
- "loss": 1.5538,
231
  "step": 370
232
  },
233
  {
234
  "epoch": 0.93,
235
  "learning_rate": 4.679802955665025e-05,
236
- "loss": 1.5496,
237
  "step": 380
238
  },
239
  {
240
  "epoch": 0.96,
241
  "learning_rate": 4.802955665024631e-05,
242
- "loss": 1.5248,
243
  "step": 390
244
  },
245
  {
246
  "epoch": 0.98,
247
  "learning_rate": 4.926108374384237e-05,
248
- "loss": 1.5645,
249
  "step": 400
250
  },
251
  {
252
  "epoch": 1.0,
253
- "eval_accuracy": 0.7904926534140018,
254
- "eval_loss": 1.0879656076431274,
255
- "eval_runtime": 27.6968,
256
- "eval_samples_per_second": 208.869,
257
- "eval_steps_per_second": 6.535,
258
  "step": 406
259
  },
260
  {
261
  "epoch": 1.01,
262
  "learning_rate": 4.9945265462506846e-05,
263
- "loss": 1.5933,
264
  "step": 410
265
  },
266
  {
267
  "epoch": 1.03,
268
  "learning_rate": 4.980842911877395e-05,
269
- "loss": 1.5284,
270
  "step": 420
271
  },
272
  {
273
  "epoch": 1.06,
274
  "learning_rate": 4.9671592775041054e-05,
275
- "loss": 1.5239,
276
  "step": 430
277
  },
278
  {
279
  "epoch": 1.08,
280
  "learning_rate": 4.953475643130816e-05,
281
- "loss": 1.4781,
282
  "step": 440
283
  },
284
  {
285
  "epoch": 1.11,
286
  "learning_rate": 4.939792008757526e-05,
287
- "loss": 1.468,
288
  "step": 450
289
  },
290
  {
291
  "epoch": 1.13,
292
  "learning_rate": 4.926108374384237e-05,
293
- "loss": 1.5165,
294
  "step": 460
295
  },
296
  {
297
  "epoch": 1.16,
298
  "learning_rate": 4.912424740010947e-05,
299
- "loss": 1.5641,
300
  "step": 470
301
  },
302
  {
303
  "epoch": 1.18,
304
  "learning_rate": 4.8987411056376576e-05,
305
- "loss": 1.53,
306
  "step": 480
307
  },
308
  {
309
  "epoch": 1.2,
310
  "learning_rate": 4.885057471264368e-05,
311
- "loss": 1.4946,
312
  "step": 490
313
  },
314
  {
315
  "epoch": 1.23,
316
  "learning_rate": 4.8713738368910785e-05,
317
- "loss": 1.5378,
318
  "step": 500
319
  },
320
  {
321
  "epoch": 1.25,
322
  "learning_rate": 4.857690202517789e-05,
323
- "loss": 1.4228,
324
  "step": 510
325
  },
326
  {
327
  "epoch": 1.28,
328
  "learning_rate": 4.8440065681444994e-05,
329
- "loss": 1.4621,
330
  "step": 520
331
  },
332
  {
333
  "epoch": 1.3,
334
  "learning_rate": 4.83032293377121e-05,
335
- "loss": 1.5405,
336
  "step": 530
337
  },
338
  {
339
  "epoch": 1.33,
340
  "learning_rate": 4.81663929939792e-05,
341
- "loss": 1.4338,
342
  "step": 540
343
  },
344
  {
345
  "epoch": 1.35,
346
  "learning_rate": 4.802955665024631e-05,
347
- "loss": 1.3429,
348
  "step": 550
349
  },
350
  {
351
  "epoch": 1.38,
352
  "learning_rate": 4.789272030651341e-05,
353
- "loss": 1.4912,
354
  "step": 560
355
  },
356
  {
357
  "epoch": 1.4,
358
  "learning_rate": 4.7755883962780516e-05,
359
- "loss": 1.4859,
360
  "step": 570
361
  },
362
  {
363
  "epoch": 1.43,
364
  "learning_rate": 4.761904761904762e-05,
365
- "loss": 1.3696,
366
  "step": 580
367
  },
368
  {
369
  "epoch": 1.45,
370
  "learning_rate": 4.7482211275314725e-05,
371
- "loss": 1.3818,
372
  "step": 590
373
  },
374
  {
375
  "epoch": 1.48,
376
  "learning_rate": 4.734537493158183e-05,
377
- "loss": 1.395,
378
  "step": 600
379
  },
380
  {
381
  "epoch": 1.5,
382
  "learning_rate": 4.7208538587848934e-05,
383
- "loss": 1.5301,
384
  "step": 610
385
  },
386
  {
387
  "epoch": 1.52,
388
  "learning_rate": 4.707170224411604e-05,
389
- "loss": 1.3008,
390
  "step": 620
391
  },
392
  {
393
  "epoch": 1.55,
394
  "learning_rate": 4.693486590038315e-05,
395
- "loss": 1.3089,
396
  "step": 630
397
  },
398
  {
399
  "epoch": 1.57,
400
  "learning_rate": 4.679802955665025e-05,
401
- "loss": 1.4767,
402
  "step": 640
403
  },
404
  {
405
  "epoch": 1.6,
406
  "learning_rate": 4.666119321291735e-05,
407
- "loss": 1.3715,
408
  "step": 650
409
  },
410
  {
411
  "epoch": 1.62,
412
  "learning_rate": 4.652435686918446e-05,
413
- "loss": 1.3359,
414
  "step": 660
415
  },
416
  {
417
  "epoch": 1.65,
418
  "learning_rate": 4.638752052545156e-05,
419
- "loss": 1.38,
420
  "step": 670
421
  },
422
  {
423
  "epoch": 1.67,
424
  "learning_rate": 4.6250684181718664e-05,
425
- "loss": 1.4252,
426
  "step": 680
427
  },
428
  {
429
  "epoch": 1.7,
430
  "learning_rate": 4.611384783798577e-05,
431
- "loss": 1.2751,
432
  "step": 690
433
  },
434
  {
435
  "epoch": 1.72,
436
  "learning_rate": 4.597701149425287e-05,
437
- "loss": 1.4558,
438
  "step": 700
439
  },
440
  {
441
  "epoch": 1.75,
442
  "learning_rate": 4.5840175150519984e-05,
443
- "loss": 1.3744,
444
  "step": 710
445
  },
446
  {
447
  "epoch": 1.77,
448
  "learning_rate": 4.570333880678708e-05,
449
- "loss": 1.3676,
450
  "step": 720
451
  },
452
  {
453
  "epoch": 1.79,
454
  "learning_rate": 4.5566502463054186e-05,
455
- "loss": 1.3031,
456
  "step": 730
457
  },
458
  {
459
  "epoch": 1.82,
460
  "learning_rate": 4.54296661193213e-05,
461
- "loss": 1.346,
462
  "step": 740
463
  },
464
  {
465
  "epoch": 1.84,
466
  "learning_rate": 4.5292829775588395e-05,
467
- "loss": 1.337,
468
  "step": 750
469
  },
470
  {
471
  "epoch": 1.87,
472
  "learning_rate": 4.5155993431855506e-05,
473
- "loss": 1.318,
474
  "step": 760
475
  },
476
  {
477
  "epoch": 1.89,
478
  "learning_rate": 4.501915708812261e-05,
479
- "loss": 1.4677,
480
  "step": 770
481
  },
482
  {
483
  "epoch": 1.92,
484
  "learning_rate": 4.488232074438971e-05,
485
- "loss": 1.2769,
486
  "step": 780
487
  },
488
  {
489
  "epoch": 1.94,
490
  "learning_rate": 4.474548440065682e-05,
491
- "loss": 1.2428,
492
  "step": 790
493
  },
494
  {
495
  "epoch": 1.97,
496
  "learning_rate": 4.460864805692392e-05,
497
- "loss": 1.2961,
498
  "step": 800
499
  },
500
  {
501
  "epoch": 1.99,
502
  "learning_rate": 4.447181171319103e-05,
503
- "loss": 1.3126,
504
  "step": 810
505
  },
506
  {
507
  "epoch": 2.0,
508
- "eval_accuracy": 0.8399308556611927,
509
- "eval_loss": 0.7987398505210876,
510
- "eval_runtime": 27.7317,
511
- "eval_samples_per_second": 208.606,
512
- "eval_steps_per_second": 6.527,
513
  "step": 813
514
  },
515
  {
516
  "epoch": 2.02,
517
  "learning_rate": 4.433497536945813e-05,
518
- "loss": 1.271,
519
  "step": 820
520
  },
521
  {
522
  "epoch": 2.04,
523
  "learning_rate": 4.419813902572523e-05,
524
- "loss": 1.3539,
525
  "step": 830
526
  },
527
  {
528
  "epoch": 2.07,
529
  "learning_rate": 4.406130268199234e-05,
530
- "loss": 1.2938,
531
  "step": 840
532
  },
533
  {
534
  "epoch": 2.09,
535
  "learning_rate": 4.3924466338259446e-05,
536
- "loss": 1.2817,
537
  "step": 850
538
  },
539
  {
540
  "epoch": 2.11,
541
  "learning_rate": 4.3787629994526544e-05,
542
- "loss": 1.2808,
543
  "step": 860
544
  },
545
  {
546
  "epoch": 2.14,
547
  "learning_rate": 4.3650793650793655e-05,
548
- "loss": 1.2517,
549
  "step": 870
550
  },
551
  {
552
  "epoch": 2.16,
553
  "learning_rate": 4.351395730706076e-05,
554
- "loss": 1.2371,
555
  "step": 880
556
  },
557
  {
558
  "epoch": 2.19,
559
  "learning_rate": 4.3377120963327864e-05,
560
- "loss": 1.277,
561
  "step": 890
562
  },
563
  {
564
  "epoch": 2.21,
565
  "learning_rate": 4.324028461959497e-05,
566
- "loss": 1.2152,
567
  "step": 900
568
  },
569
  {
570
  "epoch": 2.24,
571
  "learning_rate": 4.3103448275862066e-05,
572
- "loss": 1.1733,
573
  "step": 910
574
  },
575
  {
576
  "epoch": 2.26,
577
  "learning_rate": 4.296661193212918e-05,
578
- "loss": 1.3563,
579
  "step": 920
580
  },
581
  {
582
  "epoch": 2.29,
583
  "learning_rate": 4.282977558839628e-05,
584
- "loss": 1.4114,
585
  "step": 930
586
  },
587
  {
588
  "epoch": 2.31,
589
  "learning_rate": 4.2692939244663386e-05,
590
- "loss": 1.1794,
591
  "step": 940
592
  },
593
  {
594
  "epoch": 2.34,
595
  "learning_rate": 4.255610290093049e-05,
596
- "loss": 1.2454,
597
  "step": 950
598
  },
599
  {
600
  "epoch": 2.36,
601
  "learning_rate": 4.2419266557197594e-05,
602
- "loss": 1.2245,
603
  "step": 960
604
  },
605
  {
606
  "epoch": 2.38,
607
  "learning_rate": 4.22824302134647e-05,
608
- "loss": 1.2197,
609
  "step": 970
610
  },
611
  {
612
  "epoch": 2.41,
613
  "learning_rate": 4.21455938697318e-05,
614
- "loss": 1.3025,
615
  "step": 980
616
  },
617
  {
618
  "epoch": 2.43,
619
  "learning_rate": 4.200875752599891e-05,
620
- "loss": 1.1717,
621
  "step": 990
622
  },
623
  {
624
  "epoch": 2.46,
625
  "learning_rate": 4.187192118226601e-05,
626
- "loss": 1.2077,
627
  "step": 1000
628
  },
629
  {
630
  "epoch": 2.48,
631
  "learning_rate": 4.1735084838533116e-05,
632
- "loss": 1.3137,
633
  "step": 1010
634
  },
635
  {
636
  "epoch": 2.51,
637
  "learning_rate": 4.159824849480022e-05,
638
- "loss": 1.2348,
639
  "step": 1020
640
  },
641
  {
642
  "epoch": 2.53,
643
  "learning_rate": 4.1461412151067325e-05,
644
- "loss": 1.2611,
645
  "step": 1030
646
  },
647
  {
648
  "epoch": 2.56,
649
  "learning_rate": 4.132457580733443e-05,
650
- "loss": 1.0912,
651
  "step": 1040
652
  },
653
  {
654
  "epoch": 2.58,
655
  "learning_rate": 4.1187739463601534e-05,
656
- "loss": 1.3145,
657
  "step": 1050
658
  },
659
  {
660
  "epoch": 2.61,
661
  "learning_rate": 4.105090311986864e-05,
662
- "loss": 1.2088,
663
  "step": 1060
664
  },
665
  {
666
  "epoch": 2.63,
667
  "learning_rate": 4.091406677613574e-05,
668
- "loss": 1.1283,
669
  "step": 1070
670
  },
671
  {
672
  "epoch": 2.66,
673
  "learning_rate": 4.077723043240285e-05,
674
- "loss": 1.1388,
675
  "step": 1080
676
  },
677
  {
678
  "epoch": 2.68,
679
  "learning_rate": 4.064039408866995e-05,
680
- "loss": 1.2748,
681
  "step": 1090
682
  },
683
  {
684
  "epoch": 2.7,
685
  "learning_rate": 4.050355774493706e-05,
686
- "loss": 1.0486,
687
  "step": 1100
688
  },
689
  {
690
  "epoch": 2.73,
691
  "learning_rate": 4.036672140120416e-05,
692
- "loss": 1.1514,
693
  "step": 1110
694
  },
695
  {
696
  "epoch": 2.75,
697
  "learning_rate": 4.0229885057471265e-05,
698
- "loss": 1.2419,
699
  "step": 1120
700
  },
701
  {
702
  "epoch": 2.78,
703
  "learning_rate": 4.009304871373837e-05,
704
- "loss": 1.3025,
705
  "step": 1130
706
  },
707
  {
708
  "epoch": 2.8,
709
  "learning_rate": 3.9956212370005474e-05,
710
- "loss": 1.1627,
711
  "step": 1140
712
  },
713
  {
714
  "epoch": 2.83,
715
  "learning_rate": 3.981937602627258e-05,
716
- "loss": 1.2263,
717
  "step": 1150
718
  },
719
  {
720
  "epoch": 2.85,
721
  "learning_rate": 3.968253968253968e-05,
722
- "loss": 1.1918,
723
  "step": 1160
724
  },
725
  {
726
  "epoch": 2.88,
727
  "learning_rate": 3.954570333880679e-05,
728
- "loss": 1.2337,
729
  "step": 1170
730
  },
731
  {
732
  "epoch": 2.9,
733
  "learning_rate": 3.94088669950739e-05,
734
- "loss": 1.325,
735
  "step": 1180
736
  },
737
  {
738
  "epoch": 2.93,
739
  "learning_rate": 3.9272030651340996e-05,
740
- "loss": 1.1631,
741
  "step": 1190
742
  },
743
  {
744
  "epoch": 2.95,
745
  "learning_rate": 3.91351943076081e-05,
746
- "loss": 1.2506,
747
  "step": 1200
748
  },
749
  {
750
  "epoch": 2.97,
751
  "learning_rate": 3.899835796387521e-05,
752
- "loss": 1.2831,
753
  "step": 1210
754
  },
755
  {
756
  "epoch": 3.0,
757
  "learning_rate": 3.886152162014231e-05,
758
- "loss": 1.2335,
759
  "step": 1220
760
  },
761
  {
762
  "epoch": 3.0,
763
- "eval_accuracy": 0.8741573033707866,
764
- "eval_loss": 0.6398221254348755,
765
- "eval_runtime": 27.2608,
766
- "eval_samples_per_second": 212.209,
767
- "eval_steps_per_second": 6.64,
768
  "step": 1220
769
  },
770
  {
771
  "epoch": 3.02,
772
  "learning_rate": 3.872468527640942e-05,
773
- "loss": 1.1144,
774
  "step": 1230
775
  },
776
  {
777
  "epoch": 3.05,
778
  "learning_rate": 3.858784893267652e-05,
779
- "loss": 1.1746,
780
  "step": 1240
781
  },
782
  {
783
  "epoch": 3.07,
784
  "learning_rate": 3.845101258894362e-05,
785
- "loss": 1.1363,
786
  "step": 1250
787
  },
788
  {
789
  "epoch": 3.1,
790
  "learning_rate": 3.831417624521073e-05,
791
- "loss": 1.1003,
792
  "step": 1260
793
  },
794
  {
795
  "epoch": 3.12,
796
  "learning_rate": 3.817733990147783e-05,
797
- "loss": 1.1743,
798
  "step": 1270
799
  },
800
  {
801
  "epoch": 3.15,
802
  "learning_rate": 3.8040503557744935e-05,
803
- "loss": 1.1832,
804
  "step": 1280
805
  },
806
  {
807
  "epoch": 3.17,
808
  "learning_rate": 3.7903667214012047e-05,
809
- "loss": 1.1495,
810
  "step": 1290
811
  },
812
  {
813
  "epoch": 3.2,
814
  "learning_rate": 3.7766830870279144e-05,
815
- "loss": 1.1282,
816
  "step": 1300
817
  },
818
  {
819
  "epoch": 3.22,
820
  "learning_rate": 3.7629994526546255e-05,
821
- "loss": 1.1273,
822
  "step": 1310
823
  },
824
  {
825
  "epoch": 3.25,
826
  "learning_rate": 3.749315818281336e-05,
827
- "loss": 1.0586,
828
  "step": 1320
829
  },
830
  {
831
  "epoch": 3.27,
832
  "learning_rate": 3.735632183908046e-05,
833
- "loss": 1.1353,
834
  "step": 1330
835
  },
836
  {
837
  "epoch": 3.29,
838
  "learning_rate": 3.721948549534757e-05,
839
- "loss": 1.0796,
840
  "step": 1340
841
  },
842
  {
843
  "epoch": 3.32,
844
  "learning_rate": 3.7082649151614666e-05,
845
- "loss": 1.1132,
846
  "step": 1350
847
  },
848
  {
849
  "epoch": 3.34,
850
  "learning_rate": 3.694581280788178e-05,
851
- "loss": 1.0474,
852
  "step": 1360
853
  },
854
  {
855
  "epoch": 3.37,
856
  "learning_rate": 3.680897646414888e-05,
857
- "loss": 1.1392,
858
  "step": 1370
859
  },
860
  {
861
  "epoch": 3.39,
862
  "learning_rate": 3.667214012041598e-05,
863
- "loss": 1.1334,
864
  "step": 1380
865
  },
866
  {
867
  "epoch": 3.42,
868
  "learning_rate": 3.653530377668309e-05,
869
- "loss": 1.0506,
870
  "step": 1390
871
  },
872
  {
873
  "epoch": 3.44,
874
  "learning_rate": 3.6398467432950195e-05,
875
- "loss": 1.1107,
876
  "step": 1400
877
  },
878
  {
879
  "epoch": 3.47,
880
  "learning_rate": 3.62616310892173e-05,
881
- "loss": 1.1643,
882
  "step": 1410
883
  },
884
  {
885
  "epoch": 3.49,
886
  "learning_rate": 3.6124794745484404e-05,
887
- "loss": 1.0167,
888
  "step": 1420
889
  },
890
  {
891
  "epoch": 3.52,
892
  "learning_rate": 3.598795840175151e-05,
893
- "loss": 1.2377,
894
  "step": 1430
895
  },
896
  {
897
  "epoch": 3.54,
898
  "learning_rate": 3.585112205801861e-05,
899
- "loss": 1.0392,
900
  "step": 1440
901
  },
902
  {
903
  "epoch": 3.56,
904
  "learning_rate": 3.571428571428572e-05,
905
- "loss": 1.1304,
906
  "step": 1450
907
  },
908
  {
909
  "epoch": 3.59,
910
  "learning_rate": 3.5577449370552815e-05,
911
- "loss": 1.2185,
912
  "step": 1460
913
  },
914
  {
915
  "epoch": 3.61,
916
  "learning_rate": 3.5440613026819926e-05,
917
- "loss": 1.0848,
918
  "step": 1470
919
  },
920
  {
921
  "epoch": 3.64,
922
  "learning_rate": 3.530377668308703e-05,
923
- "loss": 1.1142,
924
  "step": 1480
925
  },
926
  {
927
  "epoch": 3.66,
928
  "learning_rate": 3.5166940339354135e-05,
929
- "loss": 1.0458,
930
  "step": 1490
931
  },
932
  {
933
  "epoch": 3.69,
934
  "learning_rate": 3.503010399562124e-05,
935
- "loss": 1.041,
936
  "step": 1500
937
  },
938
  {
939
  "epoch": 3.71,
940
  "learning_rate": 3.489326765188834e-05,
941
- "loss": 1.119,
942
  "step": 1510
943
  },
944
  {
945
  "epoch": 3.74,
946
  "learning_rate": 3.475643130815545e-05,
947
- "loss": 1.0259,
948
  "step": 1520
949
  },
950
  {
951
  "epoch": 3.76,
952
  "learning_rate": 3.461959496442255e-05,
953
- "loss": 1.14,
954
  "step": 1530
955
  },
956
  {
957
  "epoch": 3.79,
958
  "learning_rate": 3.4482758620689657e-05,
959
- "loss": 0.994,
960
  "step": 1540
961
  },
962
  {
963
  "epoch": 3.81,
964
  "learning_rate": 3.434592227695676e-05,
965
- "loss": 1.011,
966
  "step": 1550
967
  },
968
  {
969
  "epoch": 3.84,
970
  "learning_rate": 3.4209085933223865e-05,
971
- "loss": 1.0745,
972
  "step": 1560
973
  },
974
  {
975
  "epoch": 3.86,
976
  "learning_rate": 3.407224958949097e-05,
977
- "loss": 1.0777,
978
  "step": 1570
979
  },
980
  {
981
  "epoch": 3.88,
982
  "learning_rate": 3.3935413245758074e-05,
983
- "loss": 1.1397,
984
  "step": 1580
985
  },
986
  {
987
  "epoch": 3.91,
988
  "learning_rate": 3.379857690202518e-05,
989
- "loss": 1.0727,
990
  "step": 1590
991
  },
992
  {
993
  "epoch": 3.93,
994
  "learning_rate": 3.366174055829228e-05,
995
- "loss": 1.0662,
996
  "step": 1600
997
  },
998
  {
999
  "epoch": 3.96,
1000
  "learning_rate": 3.352490421455939e-05,
1001
- "loss": 1.0208,
1002
  "step": 1610
1003
  },
1004
  {
1005
  "epoch": 3.98,
1006
  "learning_rate": 3.338806787082649e-05,
1007
- "loss": 1.1244,
1008
  "step": 1620
1009
  },
1010
  {
1011
  "epoch": 4.0,
1012
- "eval_accuracy": 0.9040622299049266,
1013
- "eval_loss": 0.497685045003891,
1014
- "eval_runtime": 27.8668,
1015
- "eval_samples_per_second": 207.595,
1016
- "eval_steps_per_second": 6.495,
1017
  "step": 1627
1018
  },
1019
  {
1020
  "epoch": 4.01,
1021
  "learning_rate": 3.3251231527093596e-05,
1022
- "loss": 0.9674,
1023
  "step": 1630
1024
  },
1025
  {
1026
  "epoch": 4.03,
1027
  "learning_rate": 3.31143951833607e-05,
1028
- "loss": 1.0643,
1029
  "step": 1640
1030
  },
1031
  {
1032
  "epoch": 4.06,
1033
  "learning_rate": 3.297755883962781e-05,
1034
- "loss": 1.0174,
1035
  "step": 1650
1036
  },
1037
  {
1038
  "epoch": 4.08,
1039
  "learning_rate": 3.284072249589491e-05,
1040
- "loss": 1.0732,
1041
  "step": 1660
1042
  },
1043
  {
1044
  "epoch": 4.11,
1045
  "learning_rate": 3.2703886152162014e-05,
1046
- "loss": 1.0587,
1047
  "step": 1670
1048
  },
1049
  {
1050
  "epoch": 4.13,
1051
  "learning_rate": 3.256704980842912e-05,
1052
- "loss": 1.0531,
1053
  "step": 1680
1054
  },
1055
  {
1056
  "epoch": 4.15,
1057
  "learning_rate": 3.243021346469622e-05,
1058
- "loss": 1.0491,
1059
  "step": 1690
1060
  },
1061
  {
1062
  "epoch": 4.18,
1063
  "learning_rate": 3.2293377120963334e-05,
1064
- "loss": 0.9433,
1065
  "step": 1700
1066
  },
1067
  {
1068
  "epoch": 4.2,
1069
  "learning_rate": 3.215654077723043e-05,
1070
- "loss": 0.9851,
1071
  "step": 1710
1072
  },
1073
  {
1074
  "epoch": 4.23,
1075
  "learning_rate": 3.2019704433497536e-05,
1076
- "loss": 1.0559,
1077
  "step": 1720
1078
  },
1079
  {
1080
  "epoch": 4.25,
1081
  "learning_rate": 3.188286808976465e-05,
1082
- "loss": 0.9604,
1083
  "step": 1730
1084
  },
1085
  {
1086
  "epoch": 4.28,
1087
  "learning_rate": 3.1746031746031745e-05,
1088
- "loss": 0.9938,
1089
  "step": 1740
1090
  },
1091
  {
1092
  "epoch": 4.3,
1093
  "learning_rate": 3.160919540229885e-05,
1094
- "loss": 1.0587,
1095
  "step": 1750
1096
  },
1097
  {
1098
  "epoch": 4.33,
1099
  "learning_rate": 3.147235905856596e-05,
1100
- "loss": 0.9771,
1101
  "step": 1760
1102
  },
1103
  {
1104
  "epoch": 4.35,
1105
  "learning_rate": 3.133552271483306e-05,
1106
- "loss": 1.0232,
1107
  "step": 1770
1108
  },
1109
  {
1110
  "epoch": 4.38,
1111
  "learning_rate": 3.119868637110017e-05,
1112
- "loss": 1.014,
1113
  "step": 1780
1114
  },
1115
  {
1116
  "epoch": 4.4,
1117
  "learning_rate": 3.1061850027367273e-05,
1118
- "loss": 0.9797,
1119
  "step": 1790
1120
  },
1121
  {
1122
  "epoch": 4.43,
1123
  "learning_rate": 3.092501368363437e-05,
1124
- "loss": 0.9898,
1125
  "step": 1800
1126
  },
1127
  {
1128
  "epoch": 4.45,
1129
  "learning_rate": 3.078817733990148e-05,
1130
- "loss": 1.0044,
1131
  "step": 1810
1132
  },
1133
  {
1134
  "epoch": 4.47,
1135
  "learning_rate": 3.065134099616858e-05,
1136
- "loss": 1.0723,
1137
  "step": 1820
1138
  },
1139
  {
1140
  "epoch": 4.5,
1141
  "learning_rate": 3.0514504652435688e-05,
1142
- "loss": 1.0545,
1143
  "step": 1830
1144
  },
1145
  {
1146
  "epoch": 4.52,
1147
  "learning_rate": 3.0377668308702795e-05,
1148
- "loss": 0.9979,
1149
  "step": 1840
1150
  },
1151
  {
1152
  "epoch": 4.55,
1153
  "learning_rate": 3.0240831964969896e-05,
1154
- "loss": 0.9971,
1155
  "step": 1850
1156
  },
1157
  {
1158
  "epoch": 4.57,
1159
  "learning_rate": 3.0103995621237e-05,
1160
- "loss": 1.0341,
1161
  "step": 1860
1162
  },
1163
  {
1164
  "epoch": 4.6,
1165
  "learning_rate": 2.996715927750411e-05,
1166
- "loss": 0.9853,
1167
  "step": 1870
1168
  },
1169
  {
1170
  "epoch": 4.62,
1171
  "learning_rate": 2.983032293377121e-05,
1172
- "loss": 1.0434,
1173
  "step": 1880
1174
  },
1175
  {
1176
  "epoch": 4.65,
1177
  "learning_rate": 2.9693486590038317e-05,
1178
- "loss": 0.94,
1179
  "step": 1890
1180
  },
1181
  {
1182
  "epoch": 4.67,
1183
  "learning_rate": 2.9556650246305422e-05,
1184
- "loss": 1.0002,
1185
  "step": 1900
1186
  },
1187
  {
1188
  "epoch": 4.7,
1189
  "learning_rate": 2.9419813902572523e-05,
1190
- "loss": 0.9252,
1191
  "step": 1910
1192
  },
1193
  {
1194
  "epoch": 4.72,
1195
  "learning_rate": 2.928297755883963e-05,
1196
- "loss": 0.8899,
1197
  "step": 1920
1198
  },
1199
  {
1200
  "epoch": 4.74,
1201
  "learning_rate": 2.914614121510673e-05,
1202
- "loss": 1.0344,
1203
  "step": 1930
1204
  },
1205
  {
1206
  "epoch": 4.77,
1207
  "learning_rate": 2.900930487137384e-05,
1208
- "loss": 1.008,
1209
  "step": 1940
1210
  },
1211
  {
1212
  "epoch": 4.79,
1213
  "learning_rate": 2.8872468527640944e-05,
1214
- "loss": 0.9935,
1215
  "step": 1950
1216
  },
1217
  {
1218
  "epoch": 4.82,
1219
  "learning_rate": 2.8735632183908045e-05,
1220
- "loss": 1.0432,
1221
  "step": 1960
1222
  },
1223
  {
1224
  "epoch": 4.84,
1225
  "learning_rate": 2.8598795840175153e-05,
1226
- "loss": 0.9235,
1227
  "step": 1970
1228
  },
1229
  {
1230
  "epoch": 4.87,
1231
  "learning_rate": 2.8461959496442257e-05,
1232
- "loss": 0.9564,
1233
  "step": 1980
1234
  },
1235
  {
1236
  "epoch": 4.89,
1237
  "learning_rate": 2.8325123152709358e-05,
1238
- "loss": 0.9873,
1239
  "step": 1990
1240
  },
1241
  {
1242
  "epoch": 4.92,
1243
  "learning_rate": 2.8188286808976466e-05,
1244
- "loss": 1.0216,
1245
  "step": 2000
1246
  },
1247
  {
1248
  "epoch": 4.94,
1249
  "learning_rate": 2.8051450465243574e-05,
1250
- "loss": 0.9,
1251
  "step": 2010
1252
  },
1253
  {
1254
  "epoch": 4.97,
1255
  "learning_rate": 2.7914614121510675e-05,
1256
- "loss": 0.9814,
1257
  "step": 2020
1258
  },
1259
  {
1260
  "epoch": 4.99,
1261
  "learning_rate": 2.777777777777778e-05,
1262
- "loss": 0.9785,
1263
  "step": 2030
1264
  },
1265
  {
1266
  "epoch": 5.0,
1267
- "eval_accuracy": 0.9139152981849611,
1268
- "eval_loss": 0.4472927749156952,
1269
- "eval_runtime": 27.773,
1270
- "eval_samples_per_second": 208.296,
1271
- "eval_steps_per_second": 6.517,
1272
  "step": 2033
1273
  },
1274
  {
1275
  "epoch": 5.02,
1276
  "learning_rate": 2.764094143404488e-05,
1277
- "loss": 0.97,
1278
  "step": 2040
1279
  },
1280
  {
1281
  "epoch": 5.04,
1282
  "learning_rate": 2.7504105090311988e-05,
1283
- "loss": 0.9339,
1284
  "step": 2050
1285
  },
1286
  {
1287
  "epoch": 5.06,
1288
  "learning_rate": 2.7367268746579096e-05,
1289
- "loss": 0.8636,
1290
  "step": 2060
1291
  },
1292
  {
1293
  "epoch": 5.09,
1294
  "learning_rate": 2.7230432402846197e-05,
1295
- "loss": 0.8525,
1296
  "step": 2070
1297
  },
1298
  {
1299
  "epoch": 5.11,
1300
  "learning_rate": 2.70935960591133e-05,
1301
- "loss": 0.9281,
1302
  "step": 2080
1303
  },
1304
  {
1305
  "epoch": 5.14,
1306
  "learning_rate": 2.695675971538041e-05,
1307
- "loss": 0.9573,
1308
  "step": 2090
1309
  },
1310
  {
1311
  "epoch": 5.16,
1312
  "learning_rate": 2.681992337164751e-05,
1313
- "loss": 1.0129,
1314
  "step": 2100
1315
  },
1316
  {
1317
  "epoch": 5.19,
1318
  "learning_rate": 2.6683087027914618e-05,
1319
- "loss": 0.9568,
1320
  "step": 2110
1321
  },
1322
  {
1323
  "epoch": 5.21,
1324
  "learning_rate": 2.6546250684181722e-05,
1325
- "loss": 0.8739,
1326
  "step": 2120
1327
  },
1328
  {
1329
  "epoch": 5.24,
1330
  "learning_rate": 2.6409414340448823e-05,
1331
- "loss": 1.0142,
1332
  "step": 2130
1333
  },
1334
  {
1335
  "epoch": 5.26,
1336
  "learning_rate": 2.627257799671593e-05,
1337
- "loss": 0.8615,
1338
  "step": 2140
1339
  },
1340
  {
1341
  "epoch": 5.29,
1342
  "learning_rate": 2.6135741652983032e-05,
1343
- "loss": 0.9213,
1344
  "step": 2150
1345
  },
1346
  {
1347
  "epoch": 5.31,
1348
  "learning_rate": 2.5998905309250136e-05,
1349
- "loss": 0.9751,
1350
  "step": 2160
1351
  },
1352
  {
1353
  "epoch": 5.33,
1354
  "learning_rate": 2.5862068965517244e-05,
1355
- "loss": 0.9144,
1356
  "step": 2170
1357
  },
1358
  {
1359
  "epoch": 5.36,
1360
  "learning_rate": 2.5725232621784345e-05,
1361
- "loss": 0.8717,
1362
  "step": 2180
1363
  },
1364
  {
1365
  "epoch": 5.38,
1366
  "learning_rate": 2.5588396278051453e-05,
1367
- "loss": 1.0529,
1368
  "step": 2190
1369
  },
1370
  {
1371
  "epoch": 5.41,
1372
  "learning_rate": 2.5451559934318557e-05,
1373
- "loss": 0.9158,
1374
  "step": 2200
1375
  },
1376
  {
1377
  "epoch": 5.43,
1378
  "learning_rate": 2.531472359058566e-05,
1379
- "loss": 0.9584,
1380
  "step": 2210
1381
  },
1382
  {
1383
  "epoch": 5.46,
1384
  "learning_rate": 2.5177887246852766e-05,
1385
- "loss": 0.9806,
1386
  "step": 2220
1387
  },
1388
  {
1389
  "epoch": 5.48,
1390
  "learning_rate": 2.5041050903119874e-05,
1391
- "loss": 0.8941,
1392
  "step": 2230
1393
  },
1394
  {
1395
  "epoch": 5.51,
1396
  "learning_rate": 2.4904214559386975e-05,
1397
- "loss": 0.9625,
1398
  "step": 2240
1399
  },
1400
  {
1401
  "epoch": 5.53,
1402
  "learning_rate": 2.476737821565408e-05,
1403
- "loss": 0.9277,
1404
  "step": 2250
1405
  },
1406
  {
1407
  "epoch": 5.56,
1408
  "learning_rate": 2.4630541871921184e-05,
1409
- "loss": 0.9645,
1410
  "step": 2260
1411
  },
1412
  {
1413
  "epoch": 5.58,
1414
  "learning_rate": 2.4493705528188288e-05,
1415
- "loss": 0.9162,
1416
  "step": 2270
1417
  },
1418
  {
1419
  "epoch": 5.61,
1420
  "learning_rate": 2.4356869184455393e-05,
1421
- "loss": 0.8772,
1422
  "step": 2280
1423
  },
1424
  {
1425
  "epoch": 5.63,
1426
  "learning_rate": 2.4220032840722497e-05,
1427
- "loss": 0.9556,
1428
  "step": 2290
1429
  },
1430
  {
1431
  "epoch": 5.65,
1432
  "learning_rate": 2.40831964969896e-05,
1433
- "loss": 0.885,
1434
  "step": 2300
1435
  },
1436
  {
1437
  "epoch": 5.68,
1438
  "learning_rate": 2.3946360153256706e-05,
1439
- "loss": 0.7871,
1440
  "step": 2310
1441
  },
1442
  {
1443
  "epoch": 5.7,
1444
  "learning_rate": 2.380952380952381e-05,
1445
- "loss": 0.8639,
1446
  "step": 2320
1447
  },
1448
  {
1449
  "epoch": 5.73,
1450
  "learning_rate": 2.3672687465790915e-05,
1451
- "loss": 0.9446,
1452
  "step": 2330
1453
  },
1454
  {
1455
  "epoch": 5.75,
1456
  "learning_rate": 2.353585112205802e-05,
1457
- "loss": 0.8905,
1458
  "step": 2340
1459
  },
1460
  {
1461
  "epoch": 5.78,
1462
  "learning_rate": 2.3399014778325123e-05,
1463
- "loss": 0.9502,
1464
  "step": 2350
1465
  },
1466
  {
1467
  "epoch": 5.8,
1468
  "learning_rate": 2.326217843459223e-05,
1469
- "loss": 0.9842,
1470
  "step": 2360
1471
  },
1472
  {
1473
  "epoch": 5.83,
1474
  "learning_rate": 2.3125342090859332e-05,
1475
- "loss": 0.8827,
1476
  "step": 2370
1477
  },
1478
  {
1479
  "epoch": 5.85,
1480
  "learning_rate": 2.2988505747126437e-05,
1481
- "loss": 0.8861,
1482
  "step": 2380
1483
  },
1484
  {
1485
  "epoch": 5.88,
1486
  "learning_rate": 2.285166940339354e-05,
1487
- "loss": 0.9701,
1488
  "step": 2390
1489
  },
1490
  {
1491
  "epoch": 5.9,
1492
  "learning_rate": 2.271483305966065e-05,
1493
- "loss": 0.8456,
1494
  "step": 2400
1495
  },
1496
  {
1497
  "epoch": 5.93,
1498
  "learning_rate": 2.2577996715927753e-05,
1499
- "loss": 0.8858,
1500
  "step": 2410
1501
  },
1502
  {
1503
  "epoch": 5.95,
1504
  "learning_rate": 2.2441160372194854e-05,
1505
- "loss": 0.8431,
1506
  "step": 2420
1507
  },
1508
  {
1509
  "epoch": 5.97,
1510
  "learning_rate": 2.230432402846196e-05,
1511
- "loss": 0.9466,
1512
  "step": 2430
1513
  },
1514
  {
1515
  "epoch": 6.0,
1516
  "learning_rate": 2.2167487684729066e-05,
1517
- "loss": 0.9625,
1518
  "step": 2440
1519
  },
1520
  {
1521
  "epoch": 6.0,
1522
- "eval_accuracy": 0.9199654278305963,
1523
- "eval_loss": 0.3929709494113922,
1524
- "eval_runtime": 27.3929,
1525
- "eval_samples_per_second": 211.186,
1526
- "eval_steps_per_second": 6.608,
1527
  "step": 2440
1528
  },
1529
  {
1530
  "epoch": 6.02,
1531
  "learning_rate": 2.203065134099617e-05,
1532
- "loss": 0.886,
1533
  "step": 2450
1534
  },
1535
  {
1536
  "epoch": 6.05,
1537
  "learning_rate": 2.1893814997263272e-05,
1538
- "loss": 0.8862,
1539
  "step": 2460
1540
  },
1541
  {
1542
  "epoch": 6.07,
1543
  "learning_rate": 2.175697865353038e-05,
1544
- "loss": 0.8938,
1545
  "step": 2470
1546
  },
1547
  {
1548
  "epoch": 6.1,
1549
  "learning_rate": 2.1620142309797484e-05,
1550
- "loss": 0.8165,
1551
  "step": 2480
1552
  },
1553
  {
1554
  "epoch": 6.12,
1555
  "learning_rate": 2.148330596606459e-05,
1556
- "loss": 0.8117,
1557
  "step": 2490
1558
  },
1559
  {
1560
  "epoch": 6.15,
1561
  "learning_rate": 2.1346469622331693e-05,
1562
- "loss": 0.8538,
1563
  "step": 2500
1564
  },
1565
  {
1566
  "epoch": 6.17,
1567
  "learning_rate": 2.1209633278598797e-05,
1568
- "loss": 0.8646,
1569
  "step": 2510
1570
  },
1571
  {
1572
  "epoch": 6.2,
1573
  "learning_rate": 2.10727969348659e-05,
1574
- "loss": 0.8506,
1575
  "step": 2520
1576
  },
1577
  {
1578
  "epoch": 6.22,
1579
  "learning_rate": 2.0935960591133006e-05,
1580
- "loss": 0.9332,
1581
  "step": 2530
1582
  },
1583
  {
1584
  "epoch": 6.24,
1585
  "learning_rate": 2.079912424740011e-05,
1586
- "loss": 0.7804,
1587
  "step": 2540
1588
  },
1589
  {
1590
  "epoch": 6.27,
1591
  "learning_rate": 2.0662287903667215e-05,
1592
- "loss": 0.8503,
1593
  "step": 2550
1594
  },
1595
  {
1596
  "epoch": 6.29,
1597
  "learning_rate": 2.052545155993432e-05,
1598
- "loss": 0.8818,
1599
  "step": 2560
1600
  },
1601
  {
1602
  "epoch": 6.32,
1603
  "learning_rate": 2.0388615216201424e-05,
1604
- "loss": 0.7721,
1605
  "step": 2570
1606
  },
1607
  {
1608
  "epoch": 6.34,
1609
  "learning_rate": 2.025177887246853e-05,
1610
- "loss": 0.8895,
1611
  "step": 2580
1612
  },
1613
  {
1614
  "epoch": 6.37,
1615
  "learning_rate": 2.0114942528735632e-05,
1616
- "loss": 0.8575,
1617
  "step": 2590
1618
  },
1619
  {
1620
  "epoch": 6.39,
1621
  "learning_rate": 1.9978106185002737e-05,
1622
- "loss": 0.8596,
1623
  "step": 2600
1624
  },
1625
  {
1626
  "epoch": 6.42,
1627
  "learning_rate": 1.984126984126984e-05,
1628
- "loss": 0.8394,
1629
  "step": 2610
1630
  },
1631
  {
1632
  "epoch": 6.44,
1633
  "learning_rate": 1.970443349753695e-05,
1634
- "loss": 0.8782,
1635
  "step": 2620
1636
  },
1637
  {
1638
  "epoch": 6.47,
1639
  "learning_rate": 1.956759715380405e-05,
1640
- "loss": 0.7369,
1641
  "step": 2630
1642
  },
1643
  {
1644
  "epoch": 6.49,
1645
  "learning_rate": 1.9430760810071154e-05,
1646
- "loss": 0.8684,
1647
  "step": 2640
1648
  },
1649
  {
1650
  "epoch": 6.52,
1651
  "learning_rate": 1.929392446633826e-05,
1652
- "loss": 0.8494,
1653
  "step": 2650
1654
  },
1655
  {
1656
  "epoch": 6.54,
1657
  "learning_rate": 1.9157088122605367e-05,
1658
- "loss": 0.764,
1659
  "step": 2660
1660
  },
1661
  {
1662
  "epoch": 6.56,
1663
  "learning_rate": 1.9020251778872468e-05,
1664
- "loss": 0.855,
1665
  "step": 2670
1666
  },
1667
  {
1668
  "epoch": 6.59,
1669
  "learning_rate": 1.8883415435139572e-05,
1670
- "loss": 0.8766,
1671
  "step": 2680
1672
  },
1673
  {
1674
  "epoch": 6.61,
1675
  "learning_rate": 1.874657909140668e-05,
1676
- "loss": 0.8737,
1677
  "step": 2690
1678
  },
1679
  {
1680
  "epoch": 6.64,
1681
  "learning_rate": 1.8609742747673784e-05,
1682
- "loss": 0.8441,
1683
  "step": 2700
1684
  },
1685
  {
1686
  "epoch": 6.66,
1687
  "learning_rate": 1.847290640394089e-05,
1688
- "loss": 0.8257,
1689
  "step": 2710
1690
  },
1691
  {
1692
  "epoch": 6.69,
1693
  "learning_rate": 1.833607006020799e-05,
1694
- "loss": 0.8367,
1695
  "step": 2720
1696
  },
1697
  {
1698
  "epoch": 6.71,
1699
  "learning_rate": 1.8199233716475097e-05,
1700
- "loss": 0.8218,
1701
  "step": 2730
1702
  },
1703
  {
1704
  "epoch": 6.74,
1705
  "learning_rate": 1.8062397372742202e-05,
1706
- "loss": 0.7645,
1707
  "step": 2740
1708
  },
1709
  {
1710
  "epoch": 6.76,
1711
  "learning_rate": 1.7925561029009306e-05,
1712
- "loss": 0.8305,
1713
  "step": 2750
1714
  },
1715
  {
1716
  "epoch": 6.79,
1717
  "learning_rate": 1.7788724685276407e-05,
1718
- "loss": 0.8306,
1719
  "step": 2760
1720
  },
1721
  {
1722
  "epoch": 6.81,
1723
  "learning_rate": 1.7651888341543515e-05,
1724
- "loss": 0.9052,
1725
  "step": 2770
1726
  },
1727
  {
1728
  "epoch": 6.83,
1729
  "learning_rate": 1.751505199781062e-05,
1730
- "loss": 0.7477,
1731
  "step": 2780
1732
  },
1733
  {
1734
  "epoch": 6.86,
1735
  "learning_rate": 1.7378215654077724e-05,
1736
- "loss": 0.8458,
1737
  "step": 2790
1738
  },
1739
  {
1740
  "epoch": 6.88,
1741
  "learning_rate": 1.7241379310344828e-05,
1742
- "loss": 0.901,
1743
  "step": 2800
1744
  },
1745
  {
1746
  "epoch": 6.91,
1747
  "learning_rate": 1.7104542966611933e-05,
1748
- "loss": 0.8307,
1749
  "step": 2810
1750
  },
1751
  {
1752
  "epoch": 6.93,
1753
  "learning_rate": 1.6967706622879037e-05,
1754
- "loss": 0.8201,
1755
  "step": 2820
1756
  },
1757
  {
1758
  "epoch": 6.96,
1759
  "learning_rate": 1.683087027914614e-05,
1760
- "loss": 0.8514,
1761
  "step": 2830
1762
  },
1763
  {
1764
  "epoch": 6.98,
1765
  "learning_rate": 1.6694033935413246e-05,
1766
- "loss": 0.8414,
1767
  "step": 2840
1768
  },
1769
  {
1770
  "epoch": 7.0,
1771
- "eval_accuracy": 0.9327571305099395,
1772
- "eval_loss": 0.3537824749946594,
1773
- "eval_runtime": 27.9648,
1774
- "eval_samples_per_second": 206.867,
1775
- "eval_steps_per_second": 6.472,
1776
  "step": 2847
1777
  },
1778
  {
1779
  "epoch": 7.01,
1780
  "learning_rate": 1.655719759168035e-05,
1781
- "loss": 0.9236,
1782
  "step": 2850
1783
  },
1784
  {
1785
  "epoch": 7.03,
1786
  "learning_rate": 1.6420361247947455e-05,
1787
- "loss": 0.8118,
1788
  "step": 2860
1789
  },
1790
  {
1791
  "epoch": 7.06,
1792
  "learning_rate": 1.628352490421456e-05,
1793
- "loss": 0.8715,
1794
  "step": 2870
1795
  },
1796
  {
1797
  "epoch": 7.08,
1798
  "learning_rate": 1.6146688560481667e-05,
1799
- "loss": 0.8181,
1800
  "step": 2880
1801
  },
1802
  {
1803
  "epoch": 7.11,
1804
  "learning_rate": 1.6009852216748768e-05,
1805
- "loss": 0.8577,
1806
  "step": 2890
1807
  },
1808
  {
1809
  "epoch": 7.13,
1810
  "learning_rate": 1.5873015873015872e-05,
1811
- "loss": 0.7951,
1812
  "step": 2900
1813
  },
1814
  {
1815
  "epoch": 7.15,
1816
  "learning_rate": 1.573617952928298e-05,
1817
- "loss": 0.8414,
1818
  "step": 2910
1819
  },
1820
  {
1821
  "epoch": 7.18,
1822
  "learning_rate": 1.5599343185550085e-05,
1823
- "loss": 0.8405,
1824
  "step": 2920
1825
  },
1826
  {
1827
  "epoch": 7.2,
1828
  "learning_rate": 1.5462506841817186e-05,
1829
- "loss": 0.8375,
1830
  "step": 2930
1831
  },
1832
  {
1833
  "epoch": 7.23,
1834
  "learning_rate": 1.532567049808429e-05,
1835
- "loss": 0.8256,
1836
  "step": 2940
1837
  },
1838
  {
1839
  "epoch": 7.25,
1840
  "learning_rate": 1.5188834154351398e-05,
1841
- "loss": 0.9106,
1842
  "step": 2950
1843
  },
1844
  {
1845
  "epoch": 7.28,
1846
  "learning_rate": 1.50519978106185e-05,
1847
- "loss": 0.7944,
1848
  "step": 2960
1849
  },
1850
  {
1851
  "epoch": 7.3,
1852
  "learning_rate": 1.4915161466885605e-05,
1853
- "loss": 0.8647,
1854
  "step": 2970
1855
  },
1856
  {
1857
  "epoch": 7.33,
1858
  "learning_rate": 1.4778325123152711e-05,
1859
- "loss": 0.7921,
1860
  "step": 2980
1861
  },
1862
  {
1863
  "epoch": 7.35,
1864
  "learning_rate": 1.4641488779419815e-05,
1865
- "loss": 0.7978,
1866
  "step": 2990
1867
  },
1868
  {
1869
  "epoch": 7.38,
1870
  "learning_rate": 1.450465243568692e-05,
1871
- "loss": 0.8136,
1872
  "step": 3000
1873
  },
1874
  {
1875
  "epoch": 7.4,
1876
  "learning_rate": 1.4367816091954022e-05,
1877
- "loss": 0.8278,
1878
  "step": 3010
1879
  },
1880
  {
1881
  "epoch": 7.42,
1882
  "learning_rate": 1.4230979748221129e-05,
1883
- "loss": 0.8061,
1884
  "step": 3020
1885
  },
1886
  {
1887
  "epoch": 7.45,
1888
  "learning_rate": 1.4094143404488233e-05,
1889
- "loss": 0.7277,
1890
  "step": 3030
1891
  },
1892
  {
1893
  "epoch": 7.47,
1894
  "learning_rate": 1.3957307060755337e-05,
1895
- "loss": 0.809,
1896
  "step": 3040
1897
  },
1898
  {
1899
  "epoch": 7.5,
1900
  "learning_rate": 1.382047071702244e-05,
1901
- "loss": 0.8359,
1902
  "step": 3050
1903
  },
1904
  {
1905
  "epoch": 7.52,
1906
  "learning_rate": 1.3683634373289548e-05,
1907
- "loss": 0.8107,
1908
  "step": 3060
1909
  },
1910
  {
1911
  "epoch": 7.55,
1912
  "learning_rate": 1.354679802955665e-05,
1913
- "loss": 0.8016,
1914
  "step": 3070
1915
  },
1916
  {
1917
  "epoch": 7.57,
1918
  "learning_rate": 1.3409961685823755e-05,
1919
- "loss": 0.8512,
1920
  "step": 3080
1921
  },
1922
  {
1923
  "epoch": 7.6,
1924
  "learning_rate": 1.3273125342090861e-05,
1925
- "loss": 0.7948,
1926
  "step": 3090
1927
  },
1928
  {
1929
  "epoch": 7.62,
1930
  "learning_rate": 1.3136288998357965e-05,
1931
- "loss": 0.8081,
1932
  "step": 3100
1933
  },
1934
  {
1935
  "epoch": 7.65,
1936
  "learning_rate": 1.2999452654625068e-05,
1937
- "loss": 0.854,
1938
  "step": 3110
1939
  },
1940
  {
1941
  "epoch": 7.67,
1942
  "learning_rate": 1.2862616310892173e-05,
1943
- "loss": 0.8504,
1944
  "step": 3120
1945
  },
1946
  {
1947
  "epoch": 7.7,
1948
  "learning_rate": 1.2725779967159279e-05,
1949
- "loss": 0.824,
1950
  "step": 3130
1951
  },
1952
  {
1953
  "epoch": 7.72,
1954
  "learning_rate": 1.2588943623426383e-05,
1955
- "loss": 0.8119,
1956
  "step": 3140
1957
  },
1958
  {
1959
  "epoch": 7.74,
1960
  "learning_rate": 1.2452107279693487e-05,
1961
- "loss": 0.8304,
1962
  "step": 3150
1963
  },
1964
  {
1965
  "epoch": 7.77,
1966
  "learning_rate": 1.2315270935960592e-05,
1967
- "loss": 0.7737,
1968
  "step": 3160
1969
  },
1970
  {
1971
  "epoch": 7.79,
1972
  "learning_rate": 1.2178434592227696e-05,
1973
- "loss": 0.8032,
1974
  "step": 3170
1975
  },
1976
  {
1977
  "epoch": 7.82,
1978
  "learning_rate": 1.20415982484948e-05,
1979
- "loss": 0.8448,
1980
  "step": 3180
1981
  },
1982
  {
1983
  "epoch": 7.84,
1984
  "learning_rate": 1.1904761904761905e-05,
1985
- "loss": 0.755,
1986
  "step": 3190
1987
  },
1988
  {
1989
  "epoch": 7.87,
1990
  "learning_rate": 1.176792556102901e-05,
1991
- "loss": 0.812,
1992
  "step": 3200
1993
  },
1994
  {
1995
  "epoch": 7.89,
1996
  "learning_rate": 1.1631089217296116e-05,
1997
- "loss": 0.8194,
1998
  "step": 3210
1999
  },
2000
  {
2001
  "epoch": 7.92,
2002
  "learning_rate": 1.1494252873563218e-05,
2003
- "loss": 0.8348,
2004
  "step": 3220
2005
  },
2006
  {
2007
  "epoch": 7.94,
2008
  "learning_rate": 1.1357416529830324e-05,
2009
- "loss": 0.7492,
2010
  "step": 3230
2011
  },
2012
  {
2013
  "epoch": 7.97,
2014
  "learning_rate": 1.1220580186097427e-05,
2015
- "loss": 0.8227,
2016
  "step": 3240
2017
  },
2018
  {
2019
  "epoch": 7.99,
2020
  "learning_rate": 1.1083743842364533e-05,
2021
- "loss": 0.8336,
2022
  "step": 3250
2023
  },
2024
  {
2025
  "epoch": 8.0,
2026
- "eval_accuracy": 0.9334485738980121,
2027
- "eval_loss": 0.3332645893096924,
2028
- "eval_runtime": 29.4073,
2029
- "eval_samples_per_second": 196.72,
2030
- "eval_steps_per_second": 6.155,
2031
  "step": 3254
2032
  },
2033
  {
2034
  "epoch": 8.01,
2035
  "learning_rate": 1.0946907498631636e-05,
2036
- "loss": 0.7665,
2037
  "step": 3260
2038
  },
2039
  {
2040
  "epoch": 8.04,
2041
  "learning_rate": 1.0810071154898742e-05,
2042
- "loss": 0.807,
2043
  "step": 3270
2044
  },
2045
  {
2046
  "epoch": 8.06,
2047
  "learning_rate": 1.0673234811165846e-05,
2048
- "loss": 0.8521,
2049
  "step": 3280
2050
  },
2051
  {
2052
  "epoch": 8.09,
2053
  "learning_rate": 1.053639846743295e-05,
2054
- "loss": 0.7569,
2055
  "step": 3290
2056
  },
2057
  {
2058
  "epoch": 8.11,
2059
  "learning_rate": 1.0399562123700055e-05,
2060
- "loss": 0.7465,
2061
  "step": 3300
2062
  },
2063
  {
2064
  "epoch": 8.14,
2065
  "learning_rate": 1.026272577996716e-05,
2066
- "loss": 0.8432,
2067
  "step": 3310
2068
  },
2069
  {
2070
  "epoch": 8.16,
2071
  "learning_rate": 1.0125889436234266e-05,
2072
- "loss": 0.7659,
2073
  "step": 3320
2074
  },
2075
  {
2076
  "epoch": 8.19,
2077
  "learning_rate": 9.989053092501368e-06,
2078
- "loss": 0.7682,
2079
  "step": 3330
2080
  },
2081
  {
2082
  "epoch": 8.21,
2083
  "learning_rate": 9.852216748768475e-06,
2084
- "loss": 0.7285,
2085
  "step": 3340
2086
  },
2087
  {
2088
  "epoch": 8.24,
2089
  "learning_rate": 9.715380405035577e-06,
2090
- "loss": 0.8721,
2091
  "step": 3350
2092
  },
2093
  {
2094
  "epoch": 8.26,
2095
  "learning_rate": 9.578544061302683e-06,
2096
- "loss": 0.761,
2097
  "step": 3360
2098
  },
2099
  {
2100
  "epoch": 8.29,
2101
  "learning_rate": 9.441707717569786e-06,
2102
- "loss": 0.7812,
2103
  "step": 3370
2104
  },
2105
  {
2106
  "epoch": 8.31,
2107
  "learning_rate": 9.304871373836892e-06,
2108
- "loss": 0.8125,
2109
  "step": 3380
2110
  },
2111
  {
2112
  "epoch": 8.33,
2113
  "learning_rate": 9.168035030103995e-06,
2114
- "loss": 0.8344,
2115
  "step": 3390
2116
  },
2117
  {
2118
  "epoch": 8.36,
2119
  "learning_rate": 9.031198686371101e-06,
2120
- "loss": 0.7884,
2121
  "step": 3400
2122
  },
2123
  {
2124
  "epoch": 8.38,
2125
  "learning_rate": 8.894362342638204e-06,
2126
- "loss": 0.7692,
2127
  "step": 3410
2128
  },
2129
  {
2130
  "epoch": 8.41,
2131
  "learning_rate": 8.75752599890531e-06,
2132
- "loss": 0.875,
2133
  "step": 3420
2134
  },
2135
  {
2136
  "epoch": 8.43,
2137
  "learning_rate": 8.620689655172414e-06,
2138
- "loss": 0.7938,
2139
  "step": 3430
2140
  },
2141
  {
2142
  "epoch": 8.46,
2143
  "learning_rate": 8.483853311439519e-06,
2144
- "loss": 0.7749,
2145
  "step": 3440
2146
  },
2147
  {
2148
  "epoch": 8.48,
2149
  "learning_rate": 8.347016967706623e-06,
2150
- "loss": 0.7628,
2151
  "step": 3450
2152
  },
2153
  {
2154
  "epoch": 8.51,
2155
  "learning_rate": 8.210180623973727e-06,
2156
- "loss": 0.8007,
2157
  "step": 3460
2158
  },
2159
  {
2160
  "epoch": 8.53,
2161
  "learning_rate": 8.073344280240833e-06,
2162
- "loss": 0.7281,
2163
  "step": 3470
2164
  },
2165
  {
2166
  "epoch": 8.56,
2167
  "learning_rate": 7.936507936507936e-06,
2168
- "loss": 0.7633,
2169
  "step": 3480
2170
  },
2171
  {
2172
  "epoch": 8.58,
2173
  "learning_rate": 7.799671592775042e-06,
2174
- "loss": 0.8202,
2175
  "step": 3490
2176
  },
2177
  {
2178
  "epoch": 8.6,
2179
  "learning_rate": 7.662835249042145e-06,
2180
- "loss": 0.8123,
2181
  "step": 3500
2182
  },
2183
  {
2184
  "epoch": 8.63,
2185
  "learning_rate": 7.52599890530925e-06,
2186
- "loss": 0.8223,
2187
  "step": 3510
2188
  },
2189
  {
2190
  "epoch": 8.65,
2191
  "learning_rate": 7.3891625615763555e-06,
2192
- "loss": 0.8456,
2193
  "step": 3520
2194
  },
2195
  {
2196
  "epoch": 8.68,
2197
  "learning_rate": 7.25232621784346e-06,
2198
- "loss": 0.752,
2199
  "step": 3530
2200
  },
2201
  {
2202
  "epoch": 8.7,
2203
  "learning_rate": 7.115489874110564e-06,
2204
- "loss": 0.7942,
2205
  "step": 3540
2206
  },
2207
  {
2208
  "epoch": 8.73,
2209
  "learning_rate": 6.978653530377669e-06,
2210
- "loss": 0.7635,
2211
  "step": 3550
2212
  },
2213
  {
2214
  "epoch": 8.75,
2215
  "learning_rate": 6.841817186644774e-06,
2216
- "loss": 0.7014,
2217
  "step": 3560
2218
  },
2219
  {
2220
  "epoch": 8.78,
2221
  "learning_rate": 6.7049808429118775e-06,
2222
- "loss": 0.7572,
2223
  "step": 3570
2224
  },
2225
  {
2226
  "epoch": 8.8,
2227
  "learning_rate": 6.568144499178983e-06,
2228
- "loss": 0.7816,
2229
  "step": 3580
2230
  },
2231
  {
2232
  "epoch": 8.83,
2233
  "learning_rate": 6.431308155446086e-06,
2234
- "loss": 0.7336,
2235
  "step": 3590
2236
  },
2237
  {
2238
  "epoch": 8.85,
2239
  "learning_rate": 6.2944718117131915e-06,
2240
- "loss": 0.7665,
2241
  "step": 3600
2242
  },
2243
  {
2244
  "epoch": 8.88,
2245
  "learning_rate": 6.157635467980296e-06,
2246
- "loss": 0.7669,
2247
  "step": 3610
2248
  },
2249
  {
2250
  "epoch": 8.9,
2251
  "learning_rate": 6.0207991242474e-06,
2252
- "loss": 0.7587,
2253
  "step": 3620
2254
  },
2255
  {
2256
  "epoch": 8.92,
2257
  "learning_rate": 5.883962780514505e-06,
2258
- "loss": 0.8309,
2259
  "step": 3630
2260
  },
2261
  {
2262
  "epoch": 8.95,
2263
  "learning_rate": 5.747126436781609e-06,
2264
- "loss": 0.7201,
2265
  "step": 3640
2266
  },
2267
  {
2268
  "epoch": 8.97,
2269
  "learning_rate": 5.6102900930487136e-06,
2270
- "loss": 0.736,
2271
  "step": 3650
2272
  },
2273
  {
2274
  "epoch": 9.0,
2275
  "learning_rate": 5.473453749315818e-06,
2276
- "loss": 0.758,
2277
  "step": 3660
2278
  },
2279
  {
2280
  "epoch": 9.0,
2281
- "eval_accuracy": 0.9405358686257562,
2282
- "eval_loss": 0.3180586099624634,
2283
- "eval_runtime": 28.3456,
2284
- "eval_samples_per_second": 204.088,
2285
- "eval_steps_per_second": 6.385,
2286
  "step": 3660
2287
  },
2288
  {
2289
  "epoch": 9.02,
2290
  "learning_rate": 5.336617405582923e-06,
2291
- "loss": 0.6856,
2292
  "step": 3670
2293
  },
2294
  {
2295
  "epoch": 9.05,
2296
  "learning_rate": 5.199781061850028e-06,
2297
- "loss": 0.8134,
2298
  "step": 3680
2299
  },
2300
  {
2301
  "epoch": 9.07,
2302
  "learning_rate": 5.062944718117133e-06,
2303
- "loss": 0.8841,
2304
  "step": 3690
2305
  },
2306
  {
2307
  "epoch": 9.1,
2308
  "learning_rate": 4.926108374384237e-06,
2309
- "loss": 0.7623,
2310
  "step": 3700
2311
  },
2312
  {
2313
  "epoch": 9.12,
2314
  "learning_rate": 4.789272030651342e-06,
2315
- "loss": 0.7488,
2316
  "step": 3710
2317
  },
2318
  {
2319
  "epoch": 9.15,
2320
  "learning_rate": 4.652435686918446e-06,
2321
- "loss": 0.7433,
2322
  "step": 3720
2323
  },
2324
  {
2325
  "epoch": 9.17,
2326
  "learning_rate": 4.5155993431855505e-06,
2327
- "loss": 0.7512,
2328
  "step": 3730
2329
  },
2330
  {
2331
  "epoch": 9.19,
2332
  "learning_rate": 4.378762999452655e-06,
2333
- "loss": 0.7725,
2334
  "step": 3740
2335
  },
2336
  {
2337
  "epoch": 9.22,
2338
  "learning_rate": 4.241926655719759e-06,
2339
- "loss": 0.7567,
2340
  "step": 3750
2341
  },
2342
  {
2343
  "epoch": 9.24,
2344
  "learning_rate": 4.105090311986864e-06,
2345
- "loss": 0.7213,
2346
  "step": 3760
2347
  },
2348
  {
2349
  "epoch": 9.27,
2350
  "learning_rate": 3.968253968253968e-06,
2351
- "loss": 0.75,
2352
  "step": 3770
2353
  },
2354
  {
2355
  "epoch": 9.29,
2356
  "learning_rate": 3.8314176245210725e-06,
2357
- "loss": 0.7999,
2358
  "step": 3780
2359
  },
2360
  {
2361
  "epoch": 9.32,
2362
  "learning_rate": 3.6945812807881777e-06,
2363
- "loss": 0.7489,
2364
  "step": 3790
2365
  },
2366
  {
2367
  "epoch": 9.34,
2368
  "learning_rate": 3.557744937055282e-06,
2369
- "loss": 0.7591,
2370
  "step": 3800
2371
  },
2372
  {
2373
  "epoch": 9.37,
2374
  "learning_rate": 3.420908593322387e-06,
2375
- "loss": 0.769,
2376
  "step": 3810
2377
  },
2378
  {
2379
  "epoch": 9.39,
2380
  "learning_rate": 3.2840722495894914e-06,
2381
- "loss": 0.7171,
2382
  "step": 3820
2383
  },
2384
  {
2385
  "epoch": 9.42,
2386
  "learning_rate": 3.1472359058565958e-06,
2387
- "loss": 0.7158,
2388
  "step": 3830
2389
  },
2390
  {
2391
  "epoch": 9.44,
2392
  "learning_rate": 3.0103995621237e-06,
2393
- "loss": 0.7132,
2394
  "step": 3840
2395
  },
2396
  {
2397
  "epoch": 9.47,
2398
  "learning_rate": 2.8735632183908046e-06,
2399
- "loss": 0.7236,
2400
  "step": 3850
2401
  },
2402
  {
2403
  "epoch": 9.49,
2404
  "learning_rate": 2.736726874657909e-06,
2405
- "loss": 0.7361,
2406
  "step": 3860
2407
  },
2408
  {
2409
  "epoch": 9.51,
2410
  "learning_rate": 2.599890530925014e-06,
2411
- "loss": 0.6774,
2412
  "step": 3870
2413
  },
2414
  {
2415
  "epoch": 9.54,
2416
  "learning_rate": 2.4630541871921186e-06,
2417
- "loss": 0.8082,
2418
  "step": 3880
2419
  },
2420
  {
2421
  "epoch": 9.56,
2422
  "learning_rate": 2.326217843459223e-06,
2423
- "loss": 0.776,
2424
  "step": 3890
2425
  },
2426
  {
2427
  "epoch": 9.59,
2428
  "learning_rate": 2.1893814997263274e-06,
2429
- "loss": 0.7655,
2430
  "step": 3900
2431
  },
2432
  {
2433
  "epoch": 9.61,
2434
  "learning_rate": 2.052545155993432e-06,
2435
- "loss": 0.7834,
2436
  "step": 3910
2437
  },
2438
  {
2439
  "epoch": 9.64,
2440
  "learning_rate": 1.9157088122605362e-06,
2441
- "loss": 0.7236,
2442
  "step": 3920
2443
  },
2444
  {
2445
  "epoch": 9.66,
2446
  "learning_rate": 1.778872468527641e-06,
2447
- "loss": 0.6974,
2448
  "step": 3930
2449
  },
2450
  {
2451
  "epoch": 9.69,
2452
  "learning_rate": 1.6420361247947457e-06,
2453
- "loss": 0.73,
2454
  "step": 3940
2455
  },
2456
  {
2457
  "epoch": 9.71,
2458
  "learning_rate": 1.50519978106185e-06,
2459
- "loss": 0.6768,
2460
  "step": 3950
2461
  },
2462
  {
2463
  "epoch": 9.74,
2464
  "learning_rate": 1.3683634373289545e-06,
2465
- "loss": 0.7841,
2466
  "step": 3960
2467
  },
2468
  {
2469
  "epoch": 9.76,
2470
  "learning_rate": 1.2315270935960593e-06,
2471
- "loss": 0.759,
2472
  "step": 3970
2473
  },
2474
  {
2475
  "epoch": 9.78,
2476
  "learning_rate": 1.0946907498631637e-06,
2477
- "loss": 0.7244,
2478
  "step": 3980
2479
  },
2480
  {
2481
  "epoch": 9.81,
2482
  "learning_rate": 9.578544061302681e-07,
2483
- "loss": 0.7326,
2484
  "step": 3990
2485
  },
2486
  {
2487
  "epoch": 9.83,
2488
  "learning_rate": 8.210180623973728e-07,
2489
- "loss": 0.7484,
2490
  "step": 4000
2491
  },
2492
  {
2493
  "epoch": 9.86,
2494
  "learning_rate": 6.841817186644772e-07,
2495
- "loss": 0.7589,
2496
  "step": 4010
2497
  },
2498
  {
2499
  "epoch": 9.88,
2500
  "learning_rate": 5.473453749315819e-07,
2501
- "loss": 0.7169,
2502
  "step": 4020
2503
  },
2504
  {
2505
  "epoch": 9.91,
2506
  "learning_rate": 4.105090311986864e-07,
2507
- "loss": 0.6762,
2508
  "step": 4030
2509
  },
2510
  {
2511
  "epoch": 9.93,
2512
  "learning_rate": 2.7367268746579093e-07,
2513
- "loss": 0.7896,
2514
  "step": 4040
2515
  },
2516
  {
2517
  "epoch": 9.96,
2518
  "learning_rate": 1.3683634373289546e-07,
2519
- "loss": 0.8115,
2520
  "step": 4050
2521
  },
2522
  {
2523
  "epoch": 9.98,
2524
  "learning_rate": 0.0,
2525
- "loss": 0.7343,
2526
  "step": 4060
2527
  },
2528
  {
2529
  "epoch": 9.98,
2530
- "eval_accuracy": 0.9403630077787382,
2531
- "eval_loss": 0.31005963683128357,
2532
- "eval_runtime": 28.1514,
2533
- "eval_samples_per_second": 205.496,
2534
- "eval_steps_per_second": 6.43,
2535
  "step": 4060
2536
  },
2537
  {
2538
  "epoch": 9.98,
2539
  "step": 4060,
2540
  "total_flos": 1.3140619208067262e+19,
2541
- "train_loss": 1.0548507860728673,
2542
- "train_runtime": 4291.8856,
2543
- "train_samples_per_second": 121.301,
2544
- "train_steps_per_second": 0.946
2545
  }
2546
  ],
2547
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.9676750216076059,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-4060",
4
  "epoch": 9.981561155500922,
5
  "eval_steps": 500,
6
  "global_step": 4060,
 
11
  {
12
  "epoch": 0.02,
13
  "learning_rate": 1.2315270935960593e-06,
14
+ "loss": 0.8581,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.05,
19
  "learning_rate": 2.4630541871921186e-06,
20
+ "loss": 0.7471,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.07,
25
  "learning_rate": 3.6945812807881777e-06,
26
+ "loss": 0.7525,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.1,
31
  "learning_rate": 4.926108374384237e-06,
32
+ "loss": 0.7179,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.12,
37
  "learning_rate": 6.157635467980296e-06,
38
+ "loss": 0.769,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.15,
43
  "learning_rate": 7.3891625615763555e-06,
44
+ "loss": 0.6952,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.17,
49
  "learning_rate": 8.620689655172414e-06,
50
+ "loss": 0.8408,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.2,
55
  "learning_rate": 9.852216748768475e-06,
56
+ "loss": 0.7396,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.22,
61
  "learning_rate": 1.1083743842364533e-05,
62
+ "loss": 0.8311,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.25,
67
  "learning_rate": 1.2315270935960592e-05,
68
+ "loss": 0.7802,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.27,
73
  "learning_rate": 1.354679802955665e-05,
74
+ "loss": 0.7188,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.3,
79
  "learning_rate": 1.4778325123152711e-05,
80
+ "loss": 0.7815,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.32,
85
  "learning_rate": 1.6009852216748768e-05,
86
+ "loss": 0.7817,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.34,
91
  "learning_rate": 1.7241379310344828e-05,
92
+ "loss": 0.7546,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 0.37,
97
  "learning_rate": 1.847290640394089e-05,
98
+ "loss": 0.7266,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 0.39,
103
  "learning_rate": 1.970443349753695e-05,
104
+ "loss": 0.7962,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 0.42,
109
  "learning_rate": 2.0935960591133006e-05,
110
+ "loss": 0.7822,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 0.44,
115
  "learning_rate": 2.2167487684729066e-05,
116
+ "loss": 0.8589,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 0.47,
121
  "learning_rate": 2.3399014778325123e-05,
122
+ "loss": 0.7298,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 0.49,
127
  "learning_rate": 2.4630541871921184e-05,
128
+ "loss": 0.8412,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 0.52,
133
  "learning_rate": 2.5862068965517244e-05,
134
+ "loss": 0.8004,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 0.54,
139
  "learning_rate": 2.70935960591133e-05,
140
+ "loss": 0.7673,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 0.57,
145
  "learning_rate": 2.8325123152709358e-05,
146
+ "loss": 0.7293,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 0.59,
151
  "learning_rate": 2.9556650246305422e-05,
152
+ "loss": 0.8035,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 0.61,
157
  "learning_rate": 3.078817733990148e-05,
158
+ "loss": 0.7597,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 0.64,
163
  "learning_rate": 3.2019704433497536e-05,
164
+ "loss": 0.8599,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 0.66,
169
  "learning_rate": 3.3251231527093596e-05,
170
+ "loss": 0.7831,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 0.69,
175
  "learning_rate": 3.4482758620689657e-05,
176
+ "loss": 0.7714,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 0.71,
181
  "learning_rate": 3.571428571428572e-05,
182
+ "loss": 0.8696,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 0.74,
187
  "learning_rate": 3.694581280788178e-05,
188
+ "loss": 0.8704,
189
  "step": 300
190
  },
191
  {
192
  "epoch": 0.76,
193
  "learning_rate": 3.817733990147783e-05,
194
+ "loss": 0.7956,
195
  "step": 310
196
  },
197
  {
198
  "epoch": 0.79,
199
  "learning_rate": 3.94088669950739e-05,
200
+ "loss": 0.758,
201
  "step": 320
202
  },
203
  {
204
  "epoch": 0.81,
205
  "learning_rate": 4.064039408866995e-05,
206
+ "loss": 0.7565,
207
  "step": 330
208
  },
209
  {
210
  "epoch": 0.84,
211
  "learning_rate": 4.187192118226601e-05,
212
+ "loss": 0.8304,
213
  "step": 340
214
  },
215
  {
216
  "epoch": 0.86,
217
  "learning_rate": 4.3103448275862066e-05,
218
+ "loss": 0.8016,
219
  "step": 350
220
  },
221
  {
222
  "epoch": 0.89,
223
  "learning_rate": 4.433497536945813e-05,
224
+ "loss": 0.8437,
225
  "step": 360
226
  },
227
  {
228
  "epoch": 0.91,
229
  "learning_rate": 4.5566502463054186e-05,
230
+ "loss": 0.8352,
231
  "step": 370
232
  },
233
  {
234
  "epoch": 0.93,
235
  "learning_rate": 4.679802955665025e-05,
236
+ "loss": 0.7455,
237
  "step": 380
238
  },
239
  {
240
  "epoch": 0.96,
241
  "learning_rate": 4.802955665024631e-05,
242
+ "loss": 0.7252,
243
  "step": 390
244
  },
245
  {
246
  "epoch": 0.98,
247
  "learning_rate": 4.926108374384237e-05,
248
+ "loss": 0.7629,
249
  "step": 400
250
  },
251
  {
252
  "epoch": 1.0,
253
+ "eval_accuracy": 0.9312013828867761,
254
+ "eval_loss": 0.3326094150543213,
255
+ "eval_runtime": 27.2379,
256
+ "eval_samples_per_second": 212.388,
257
+ "eval_steps_per_second": 6.645,
258
  "step": 406
259
  },
260
  {
261
  "epoch": 1.01,
262
  "learning_rate": 4.9945265462506846e-05,
263
+ "loss": 0.836,
264
  "step": 410
265
  },
266
  {
267
  "epoch": 1.03,
268
  "learning_rate": 4.980842911877395e-05,
269
+ "loss": 0.7864,
270
  "step": 420
271
  },
272
  {
273
  "epoch": 1.06,
274
  "learning_rate": 4.9671592775041054e-05,
275
+ "loss": 0.8633,
276
  "step": 430
277
  },
278
  {
279
  "epoch": 1.08,
280
  "learning_rate": 4.953475643130816e-05,
281
+ "loss": 0.8069,
282
  "step": 440
283
  },
284
  {
285
  "epoch": 1.11,
286
  "learning_rate": 4.939792008757526e-05,
287
+ "loss": 0.7711,
288
  "step": 450
289
  },
290
  {
291
  "epoch": 1.13,
292
  "learning_rate": 4.926108374384237e-05,
293
+ "loss": 0.7662,
294
  "step": 460
295
  },
296
  {
297
  "epoch": 1.16,
298
  "learning_rate": 4.912424740010947e-05,
299
+ "loss": 0.7585,
300
  "step": 470
301
  },
302
  {
303
  "epoch": 1.18,
304
  "learning_rate": 4.8987411056376576e-05,
305
+ "loss": 0.8457,
306
  "step": 480
307
  },
308
  {
309
  "epoch": 1.2,
310
  "learning_rate": 4.885057471264368e-05,
311
+ "loss": 0.8136,
312
  "step": 490
313
  },
314
  {
315
  "epoch": 1.23,
316
  "learning_rate": 4.8713738368910785e-05,
317
+ "loss": 0.8722,
318
  "step": 500
319
  },
320
  {
321
  "epoch": 1.25,
322
  "learning_rate": 4.857690202517789e-05,
323
+ "loss": 0.7583,
324
  "step": 510
325
  },
326
  {
327
  "epoch": 1.28,
328
  "learning_rate": 4.8440065681444994e-05,
329
+ "loss": 0.8012,
330
  "step": 520
331
  },
332
  {
333
  "epoch": 1.3,
334
  "learning_rate": 4.83032293377121e-05,
335
+ "loss": 0.8165,
336
  "step": 530
337
  },
338
  {
339
  "epoch": 1.33,
340
  "learning_rate": 4.81663929939792e-05,
341
+ "loss": 0.7361,
342
  "step": 540
343
  },
344
  {
345
  "epoch": 1.35,
346
  "learning_rate": 4.802955665024631e-05,
347
+ "loss": 0.7531,
348
  "step": 550
349
  },
350
  {
351
  "epoch": 1.38,
352
  "learning_rate": 4.789272030651341e-05,
353
+ "loss": 0.823,
354
  "step": 560
355
  },
356
  {
357
  "epoch": 1.4,
358
  "learning_rate": 4.7755883962780516e-05,
359
+ "loss": 0.8301,
360
  "step": 570
361
  },
362
  {
363
  "epoch": 1.43,
364
  "learning_rate": 4.761904761904762e-05,
365
+ "loss": 0.7367,
366
  "step": 580
367
  },
368
  {
369
  "epoch": 1.45,
370
  "learning_rate": 4.7482211275314725e-05,
371
+ "loss": 0.8218,
372
  "step": 590
373
  },
374
  {
375
  "epoch": 1.48,
376
  "learning_rate": 4.734537493158183e-05,
377
+ "loss": 0.8237,
378
  "step": 600
379
  },
380
  {
381
  "epoch": 1.5,
382
  "learning_rate": 4.7208538587848934e-05,
383
+ "loss": 0.8238,
384
  "step": 610
385
  },
386
  {
387
  "epoch": 1.52,
388
  "learning_rate": 4.707170224411604e-05,
389
+ "loss": 0.7757,
390
  "step": 620
391
  },
392
  {
393
  "epoch": 1.55,
394
  "learning_rate": 4.693486590038315e-05,
395
+ "loss": 0.8156,
396
  "step": 630
397
  },
398
  {
399
  "epoch": 1.57,
400
  "learning_rate": 4.679802955665025e-05,
401
+ "loss": 0.8355,
402
  "step": 640
403
  },
404
  {
405
  "epoch": 1.6,
406
  "learning_rate": 4.666119321291735e-05,
407
+ "loss": 0.7258,
408
  "step": 650
409
  },
410
  {
411
  "epoch": 1.62,
412
  "learning_rate": 4.652435686918446e-05,
413
+ "loss": 0.7613,
414
  "step": 660
415
  },
416
  {
417
  "epoch": 1.65,
418
  "learning_rate": 4.638752052545156e-05,
419
+ "loss": 0.7365,
420
  "step": 670
421
  },
422
  {
423
  "epoch": 1.67,
424
  "learning_rate": 4.6250684181718664e-05,
425
+ "loss": 0.7489,
426
  "step": 680
427
  },
428
  {
429
  "epoch": 1.7,
430
  "learning_rate": 4.611384783798577e-05,
431
+ "loss": 0.781,
432
  "step": 690
433
  },
434
  {
435
  "epoch": 1.72,
436
  "learning_rate": 4.597701149425287e-05,
437
+ "loss": 0.8505,
438
  "step": 700
439
  },
440
  {
441
  "epoch": 1.75,
442
  "learning_rate": 4.5840175150519984e-05,
443
+ "loss": 0.7674,
444
  "step": 710
445
  },
446
  {
447
  "epoch": 1.77,
448
  "learning_rate": 4.570333880678708e-05,
449
+ "loss": 0.8244,
450
  "step": 720
451
  },
452
  {
453
  "epoch": 1.79,
454
  "learning_rate": 4.5566502463054186e-05,
455
+ "loss": 0.8095,
456
  "step": 730
457
  },
458
  {
459
  "epoch": 1.82,
460
  "learning_rate": 4.54296661193213e-05,
461
+ "loss": 0.8051,
462
  "step": 740
463
  },
464
  {
465
  "epoch": 1.84,
466
  "learning_rate": 4.5292829775588395e-05,
467
+ "loss": 0.7743,
468
  "step": 750
469
  },
470
  {
471
  "epoch": 1.87,
472
  "learning_rate": 4.5155993431855506e-05,
473
+ "loss": 0.8334,
474
  "step": 760
475
  },
476
  {
477
  "epoch": 1.89,
478
  "learning_rate": 4.501915708812261e-05,
479
+ "loss": 0.829,
480
  "step": 770
481
  },
482
  {
483
  "epoch": 1.92,
484
  "learning_rate": 4.488232074438971e-05,
485
+ "loss": 0.788,
486
  "step": 780
487
  },
488
  {
489
  "epoch": 1.94,
490
  "learning_rate": 4.474548440065682e-05,
491
+ "loss": 0.692,
492
  "step": 790
493
  },
494
  {
495
  "epoch": 1.97,
496
  "learning_rate": 4.460864805692392e-05,
497
+ "loss": 0.7329,
498
  "step": 800
499
  },
500
  {
501
  "epoch": 1.99,
502
  "learning_rate": 4.447181171319103e-05,
503
+ "loss": 0.8118,
504
  "step": 810
505
  },
506
  {
507
  "epoch": 2.0,
508
+ "eval_accuracy": 0.9450302506482282,
509
+ "eval_loss": 0.29584750533103943,
510
+ "eval_runtime": 27.0638,
511
+ "eval_samples_per_second": 213.754,
512
+ "eval_steps_per_second": 6.688,
513
  "step": 813
514
  },
515
  {
516
  "epoch": 2.02,
517
  "learning_rate": 4.433497536945813e-05,
518
+ "loss": 0.7741,
519
  "step": 820
520
  },
521
  {
522
  "epoch": 2.04,
523
  "learning_rate": 4.419813902572523e-05,
524
+ "loss": 0.814,
525
  "step": 830
526
  },
527
  {
528
  "epoch": 2.07,
529
  "learning_rate": 4.406130268199234e-05,
530
+ "loss": 0.8301,
531
  "step": 840
532
  },
533
  {
534
  "epoch": 2.09,
535
  "learning_rate": 4.3924466338259446e-05,
536
+ "loss": 0.789,
537
  "step": 850
538
  },
539
  {
540
  "epoch": 2.11,
541
  "learning_rate": 4.3787629994526544e-05,
542
+ "loss": 0.7549,
543
  "step": 860
544
  },
545
  {
546
  "epoch": 2.14,
547
  "learning_rate": 4.3650793650793655e-05,
548
+ "loss": 0.6721,
549
  "step": 870
550
  },
551
  {
552
  "epoch": 2.16,
553
  "learning_rate": 4.351395730706076e-05,
554
+ "loss": 0.7396,
555
  "step": 880
556
  },
557
  {
558
  "epoch": 2.19,
559
  "learning_rate": 4.3377120963327864e-05,
560
+ "loss": 0.6782,
561
  "step": 890
562
  },
563
  {
564
  "epoch": 2.21,
565
  "learning_rate": 4.324028461959497e-05,
566
+ "loss": 0.7289,
567
  "step": 900
568
  },
569
  {
570
  "epoch": 2.24,
571
  "learning_rate": 4.3103448275862066e-05,
572
+ "loss": 0.712,
573
  "step": 910
574
  },
575
  {
576
  "epoch": 2.26,
577
  "learning_rate": 4.296661193212918e-05,
578
+ "loss": 0.8466,
579
  "step": 920
580
  },
581
  {
582
  "epoch": 2.29,
583
  "learning_rate": 4.282977558839628e-05,
584
+ "loss": 0.8502,
585
  "step": 930
586
  },
587
  {
588
  "epoch": 2.31,
589
  "learning_rate": 4.2692939244663386e-05,
590
+ "loss": 0.6847,
591
  "step": 940
592
  },
593
  {
594
  "epoch": 2.34,
595
  "learning_rate": 4.255610290093049e-05,
596
+ "loss": 0.8097,
597
  "step": 950
598
  },
599
  {
600
  "epoch": 2.36,
601
  "learning_rate": 4.2419266557197594e-05,
602
+ "loss": 0.709,
603
  "step": 960
604
  },
605
  {
606
  "epoch": 2.38,
607
  "learning_rate": 4.22824302134647e-05,
608
+ "loss": 0.665,
609
  "step": 970
610
  },
611
  {
612
  "epoch": 2.41,
613
  "learning_rate": 4.21455938697318e-05,
614
+ "loss": 0.6726,
615
  "step": 980
616
  },
617
  {
618
  "epoch": 2.43,
619
  "learning_rate": 4.200875752599891e-05,
620
+ "loss": 0.7391,
621
  "step": 990
622
  },
623
  {
624
  "epoch": 2.46,
625
  "learning_rate": 4.187192118226601e-05,
626
+ "loss": 0.7706,
627
  "step": 1000
628
  },
629
  {
630
  "epoch": 2.48,
631
  "learning_rate": 4.1735084838533116e-05,
632
+ "loss": 0.8454,
633
  "step": 1010
634
  },
635
  {
636
  "epoch": 2.51,
637
  "learning_rate": 4.159824849480022e-05,
638
+ "loss": 0.7661,
639
  "step": 1020
640
  },
641
  {
642
  "epoch": 2.53,
643
  "learning_rate": 4.1461412151067325e-05,
644
+ "loss": 0.7454,
645
  "step": 1030
646
  },
647
  {
648
  "epoch": 2.56,
649
  "learning_rate": 4.132457580733443e-05,
650
+ "loss": 0.6551,
651
  "step": 1040
652
  },
653
  {
654
  "epoch": 2.58,
655
  "learning_rate": 4.1187739463601534e-05,
656
+ "loss": 0.7453,
657
  "step": 1050
658
  },
659
  {
660
  "epoch": 2.61,
661
  "learning_rate": 4.105090311986864e-05,
662
+ "loss": 0.7144,
663
  "step": 1060
664
  },
665
  {
666
  "epoch": 2.63,
667
  "learning_rate": 4.091406677613574e-05,
668
+ "loss": 0.734,
669
  "step": 1070
670
  },
671
  {
672
  "epoch": 2.66,
673
  "learning_rate": 4.077723043240285e-05,
674
+ "loss": 0.7234,
675
  "step": 1080
676
  },
677
  {
678
  "epoch": 2.68,
679
  "learning_rate": 4.064039408866995e-05,
680
+ "loss": 0.7665,
681
  "step": 1090
682
  },
683
  {
684
  "epoch": 2.7,
685
  "learning_rate": 4.050355774493706e-05,
686
+ "loss": 0.6977,
687
  "step": 1100
688
  },
689
  {
690
  "epoch": 2.73,
691
  "learning_rate": 4.036672140120416e-05,
692
+ "loss": 0.7377,
693
  "step": 1110
694
  },
695
  {
696
  "epoch": 2.75,
697
  "learning_rate": 4.0229885057471265e-05,
698
+ "loss": 0.7405,
699
  "step": 1120
700
  },
701
  {
702
  "epoch": 2.78,
703
  "learning_rate": 4.009304871373837e-05,
704
+ "loss": 0.7858,
705
  "step": 1130
706
  },
707
  {
708
  "epoch": 2.8,
709
  "learning_rate": 3.9956212370005474e-05,
710
+ "loss": 0.6955,
711
  "step": 1140
712
  },
713
  {
714
  "epoch": 2.83,
715
  "learning_rate": 3.981937602627258e-05,
716
+ "loss": 0.8183,
717
  "step": 1150
718
  },
719
  {
720
  "epoch": 2.85,
721
  "learning_rate": 3.968253968253968e-05,
722
+ "loss": 0.7564,
723
  "step": 1160
724
  },
725
  {
726
  "epoch": 2.88,
727
  "learning_rate": 3.954570333880679e-05,
728
+ "loss": 0.7048,
729
  "step": 1170
730
  },
731
  {
732
  "epoch": 2.9,
733
  "learning_rate": 3.94088669950739e-05,
734
+ "loss": 0.7414,
735
  "step": 1180
736
  },
737
  {
738
  "epoch": 2.93,
739
  "learning_rate": 3.9272030651340996e-05,
740
+ "loss": 0.7146,
741
  "step": 1190
742
  },
743
  {
744
  "epoch": 2.95,
745
  "learning_rate": 3.91351943076081e-05,
746
+ "loss": 0.7579,
747
  "step": 1200
748
  },
749
  {
750
  "epoch": 2.97,
751
  "learning_rate": 3.899835796387521e-05,
752
+ "loss": 0.7648,
753
  "step": 1210
754
  },
755
  {
756
  "epoch": 3.0,
757
  "learning_rate": 3.886152162014231e-05,
758
+ "loss": 0.7189,
759
  "step": 1220
760
  },
761
  {
762
  "epoch": 3.0,
763
+ "eval_accuracy": 0.9515989628349178,
764
+ "eval_loss": 0.25021520256996155,
765
+ "eval_runtime": 28.0749,
766
+ "eval_samples_per_second": 206.056,
767
+ "eval_steps_per_second": 6.447,
768
  "step": 1220
769
  },
770
  {
771
  "epoch": 3.02,
772
  "learning_rate": 3.872468527640942e-05,
773
+ "loss": 0.748,
774
  "step": 1230
775
  },
776
  {
777
  "epoch": 3.05,
778
  "learning_rate": 3.858784893267652e-05,
779
+ "loss": 0.7554,
780
  "step": 1240
781
  },
782
  {
783
  "epoch": 3.07,
784
  "learning_rate": 3.845101258894362e-05,
785
+ "loss": 0.7256,
786
  "step": 1250
787
  },
788
  {
789
  "epoch": 3.1,
790
  "learning_rate": 3.831417624521073e-05,
791
+ "loss": 0.6269,
792
  "step": 1260
793
  },
794
  {
795
  "epoch": 3.12,
796
  "learning_rate": 3.817733990147783e-05,
797
+ "loss": 0.6848,
798
  "step": 1270
799
  },
800
  {
801
  "epoch": 3.15,
802
  "learning_rate": 3.8040503557744935e-05,
803
+ "loss": 0.7929,
804
  "step": 1280
805
  },
806
  {
807
  "epoch": 3.17,
808
  "learning_rate": 3.7903667214012047e-05,
809
+ "loss": 0.7091,
810
  "step": 1290
811
  },
812
  {
813
  "epoch": 3.2,
814
  "learning_rate": 3.7766830870279144e-05,
815
+ "loss": 0.7314,
816
  "step": 1300
817
  },
818
  {
819
  "epoch": 3.22,
820
  "learning_rate": 3.7629994526546255e-05,
821
+ "loss": 0.6781,
822
  "step": 1310
823
  },
824
  {
825
  "epoch": 3.25,
826
  "learning_rate": 3.749315818281336e-05,
827
+ "loss": 0.6571,
828
  "step": 1320
829
  },
830
  {
831
  "epoch": 3.27,
832
  "learning_rate": 3.735632183908046e-05,
833
+ "loss": 0.6719,
834
  "step": 1330
835
  },
836
  {
837
  "epoch": 3.29,
838
  "learning_rate": 3.721948549534757e-05,
839
+ "loss": 0.7128,
840
  "step": 1340
841
  },
842
  {
843
  "epoch": 3.32,
844
  "learning_rate": 3.7082649151614666e-05,
845
+ "loss": 0.6949,
846
  "step": 1350
847
  },
848
  {
849
  "epoch": 3.34,
850
  "learning_rate": 3.694581280788178e-05,
851
+ "loss": 0.6583,
852
  "step": 1360
853
  },
854
  {
855
  "epoch": 3.37,
856
  "learning_rate": 3.680897646414888e-05,
857
+ "loss": 0.7373,
858
  "step": 1370
859
  },
860
  {
861
  "epoch": 3.39,
862
  "learning_rate": 3.667214012041598e-05,
863
+ "loss": 0.707,
864
  "step": 1380
865
  },
866
  {
867
  "epoch": 3.42,
868
  "learning_rate": 3.653530377668309e-05,
869
+ "loss": 0.715,
870
  "step": 1390
871
  },
872
  {
873
  "epoch": 3.44,
874
  "learning_rate": 3.6398467432950195e-05,
875
+ "loss": 0.6595,
876
  "step": 1400
877
  },
878
  {
879
  "epoch": 3.47,
880
  "learning_rate": 3.62616310892173e-05,
881
+ "loss": 0.6768,
882
  "step": 1410
883
  },
884
  {
885
  "epoch": 3.49,
886
  "learning_rate": 3.6124794745484404e-05,
887
+ "loss": 0.6416,
888
  "step": 1420
889
  },
890
  {
891
  "epoch": 3.52,
892
  "learning_rate": 3.598795840175151e-05,
893
+ "loss": 0.8161,
894
  "step": 1430
895
  },
896
  {
897
  "epoch": 3.54,
898
  "learning_rate": 3.585112205801861e-05,
899
+ "loss": 0.7023,
900
  "step": 1440
901
  },
902
  {
903
  "epoch": 3.56,
904
  "learning_rate": 3.571428571428572e-05,
905
+ "loss": 0.6853,
906
  "step": 1450
907
  },
908
  {
909
  "epoch": 3.59,
910
  "learning_rate": 3.5577449370552815e-05,
911
+ "loss": 0.7589,
912
  "step": 1460
913
  },
914
  {
915
  "epoch": 3.61,
916
  "learning_rate": 3.5440613026819926e-05,
917
+ "loss": 0.6967,
918
  "step": 1470
919
  },
920
  {
921
  "epoch": 3.64,
922
  "learning_rate": 3.530377668308703e-05,
923
+ "loss": 0.7275,
924
  "step": 1480
925
  },
926
  {
927
  "epoch": 3.66,
928
  "learning_rate": 3.5166940339354135e-05,
929
+ "loss": 0.7026,
930
  "step": 1490
931
  },
932
  {
933
  "epoch": 3.69,
934
  "learning_rate": 3.503010399562124e-05,
935
+ "loss": 0.6894,
936
  "step": 1500
937
  },
938
  {
939
  "epoch": 3.71,
940
  "learning_rate": 3.489326765188834e-05,
941
+ "loss": 0.6976,
942
  "step": 1510
943
  },
944
  {
945
  "epoch": 3.74,
946
  "learning_rate": 3.475643130815545e-05,
947
+ "loss": 0.6837,
948
  "step": 1520
949
  },
950
  {
951
  "epoch": 3.76,
952
  "learning_rate": 3.461959496442255e-05,
953
+ "loss": 0.6921,
954
  "step": 1530
955
  },
956
  {
957
  "epoch": 3.79,
958
  "learning_rate": 3.4482758620689657e-05,
959
+ "loss": 0.5885,
960
  "step": 1540
961
  },
962
  {
963
  "epoch": 3.81,
964
  "learning_rate": 3.434592227695676e-05,
965
+ "loss": 0.7013,
966
  "step": 1550
967
  },
968
  {
969
  "epoch": 3.84,
970
  "learning_rate": 3.4209085933223865e-05,
971
+ "loss": 0.6954,
972
  "step": 1560
973
  },
974
  {
975
  "epoch": 3.86,
976
  "learning_rate": 3.407224958949097e-05,
977
+ "loss": 0.7609,
978
  "step": 1570
979
  },
980
  {
981
  "epoch": 3.88,
982
  "learning_rate": 3.3935413245758074e-05,
983
+ "loss": 0.7052,
984
  "step": 1580
985
  },
986
  {
987
  "epoch": 3.91,
988
  "learning_rate": 3.379857690202518e-05,
989
+ "loss": 0.6929,
990
  "step": 1590
991
  },
992
  {
993
  "epoch": 3.93,
994
  "learning_rate": 3.366174055829228e-05,
995
+ "loss": 0.6814,
996
  "step": 1600
997
  },
998
  {
999
  "epoch": 3.96,
1000
  "learning_rate": 3.352490421455939e-05,
1001
+ "loss": 0.7084,
1002
  "step": 1610
1003
  },
1004
  {
1005
  "epoch": 3.98,
1006
  "learning_rate": 3.338806787082649e-05,
1007
+ "loss": 0.7529,
1008
  "step": 1620
1009
  },
1010
  {
1011
  "epoch": 4.0,
1012
+ "eval_accuracy": 0.9566119273984443,
1013
+ "eval_loss": 0.2300640344619751,
1014
+ "eval_runtime": 27.261,
1015
+ "eval_samples_per_second": 212.208,
1016
+ "eval_steps_per_second": 6.64,
1017
  "step": 1627
1018
  },
1019
  {
1020
  "epoch": 4.01,
1021
  "learning_rate": 3.3251231527093596e-05,
1022
+ "loss": 0.6286,
1023
  "step": 1630
1024
  },
1025
  {
1026
  "epoch": 4.03,
1027
  "learning_rate": 3.31143951833607e-05,
1028
+ "loss": 0.7495,
1029
  "step": 1640
1030
  },
1031
  {
1032
  "epoch": 4.06,
1033
  "learning_rate": 3.297755883962781e-05,
1034
+ "loss": 0.7085,
1035
  "step": 1650
1036
  },
1037
  {
1038
  "epoch": 4.08,
1039
  "learning_rate": 3.284072249589491e-05,
1040
+ "loss": 0.72,
1041
  "step": 1660
1042
  },
1043
  {
1044
  "epoch": 4.11,
1045
  "learning_rate": 3.2703886152162014e-05,
1046
+ "loss": 0.6716,
1047
  "step": 1670
1048
  },
1049
  {
1050
  "epoch": 4.13,
1051
  "learning_rate": 3.256704980842912e-05,
1052
+ "loss": 0.641,
1053
  "step": 1680
1054
  },
1055
  {
1056
  "epoch": 4.15,
1057
  "learning_rate": 3.243021346469622e-05,
1058
+ "loss": 0.6338,
1059
  "step": 1690
1060
  },
1061
  {
1062
  "epoch": 4.18,
1063
  "learning_rate": 3.2293377120963334e-05,
1064
+ "loss": 0.5716,
1065
  "step": 1700
1066
  },
1067
  {
1068
  "epoch": 4.2,
1069
  "learning_rate": 3.215654077723043e-05,
1070
+ "loss": 0.6536,
1071
  "step": 1710
1072
  },
1073
  {
1074
  "epoch": 4.23,
1075
  "learning_rate": 3.2019704433497536e-05,
1076
+ "loss": 0.6454,
1077
  "step": 1720
1078
  },
1079
  {
1080
  "epoch": 4.25,
1081
  "learning_rate": 3.188286808976465e-05,
1082
+ "loss": 0.6392,
1083
  "step": 1730
1084
  },
1085
  {
1086
  "epoch": 4.28,
1087
  "learning_rate": 3.1746031746031745e-05,
1088
+ "loss": 0.6275,
1089
  "step": 1740
1090
  },
1091
  {
1092
  "epoch": 4.3,
1093
  "learning_rate": 3.160919540229885e-05,
1094
+ "loss": 0.6634,
1095
  "step": 1750
1096
  },
1097
  {
1098
  "epoch": 4.33,
1099
  "learning_rate": 3.147235905856596e-05,
1100
+ "loss": 0.6603,
1101
  "step": 1760
1102
  },
1103
  {
1104
  "epoch": 4.35,
1105
  "learning_rate": 3.133552271483306e-05,
1106
+ "loss": 0.704,
1107
  "step": 1770
1108
  },
1109
  {
1110
  "epoch": 4.38,
1111
  "learning_rate": 3.119868637110017e-05,
1112
+ "loss": 0.7157,
1113
  "step": 1780
1114
  },
1115
  {
1116
  "epoch": 4.4,
1117
  "learning_rate": 3.1061850027367273e-05,
1118
+ "loss": 0.6142,
1119
  "step": 1790
1120
  },
1121
  {
1122
  "epoch": 4.43,
1123
  "learning_rate": 3.092501368363437e-05,
1124
+ "loss": 0.5772,
1125
  "step": 1800
1126
  },
1127
  {
1128
  "epoch": 4.45,
1129
  "learning_rate": 3.078817733990148e-05,
1130
+ "loss": 0.6924,
1131
  "step": 1810
1132
  },
1133
  {
1134
  "epoch": 4.47,
1135
  "learning_rate": 3.065134099616858e-05,
1136
+ "loss": 0.6931,
1137
  "step": 1820
1138
  },
1139
  {
1140
  "epoch": 4.5,
1141
  "learning_rate": 3.0514504652435688e-05,
1142
+ "loss": 0.6155,
1143
  "step": 1830
1144
  },
1145
  {
1146
  "epoch": 4.52,
1147
  "learning_rate": 3.0377668308702795e-05,
1148
+ "loss": 0.6732,
1149
  "step": 1840
1150
  },
1151
  {
1152
  "epoch": 4.55,
1153
  "learning_rate": 3.0240831964969896e-05,
1154
+ "loss": 0.6537,
1155
  "step": 1850
1156
  },
1157
  {
1158
  "epoch": 4.57,
1159
  "learning_rate": 3.0103995621237e-05,
1160
+ "loss": 0.6704,
1161
  "step": 1860
1162
  },
1163
  {
1164
  "epoch": 4.6,
1165
  "learning_rate": 2.996715927750411e-05,
1166
+ "loss": 0.6187,
1167
  "step": 1870
1168
  },
1169
  {
1170
  "epoch": 4.62,
1171
  "learning_rate": 2.983032293377121e-05,
1172
+ "loss": 0.6287,
1173
  "step": 1880
1174
  },
1175
  {
1176
  "epoch": 4.65,
1177
  "learning_rate": 2.9693486590038317e-05,
1178
+ "loss": 0.5647,
1179
  "step": 1890
1180
  },
1181
  {
1182
  "epoch": 4.67,
1183
  "learning_rate": 2.9556650246305422e-05,
1184
+ "loss": 0.685,
1185
  "step": 1900
1186
  },
1187
  {
1188
  "epoch": 4.7,
1189
  "learning_rate": 2.9419813902572523e-05,
1190
+ "loss": 0.5978,
1191
  "step": 1910
1192
  },
1193
  {
1194
  "epoch": 4.72,
1195
  "learning_rate": 2.928297755883963e-05,
1196
+ "loss": 0.608,
1197
  "step": 1920
1198
  },
1199
  {
1200
  "epoch": 4.74,
1201
  "learning_rate": 2.914614121510673e-05,
1202
+ "loss": 0.6599,
1203
  "step": 1930
1204
  },
1205
  {
1206
  "epoch": 4.77,
1207
  "learning_rate": 2.900930487137384e-05,
1208
+ "loss": 0.6674,
1209
  "step": 1940
1210
  },
1211
  {
1212
  "epoch": 4.79,
1213
  "learning_rate": 2.8872468527640944e-05,
1214
+ "loss": 0.6663,
1215
  "step": 1950
1216
  },
1217
  {
1218
  "epoch": 4.82,
1219
  "learning_rate": 2.8735632183908045e-05,
1220
+ "loss": 0.6524,
1221
  "step": 1960
1222
  },
1223
  {
1224
  "epoch": 4.84,
1225
  "learning_rate": 2.8598795840175153e-05,
1226
+ "loss": 0.6612,
1227
  "step": 1970
1228
  },
1229
  {
1230
  "epoch": 4.87,
1231
  "learning_rate": 2.8461959496442257e-05,
1232
+ "loss": 0.7027,
1233
  "step": 1980
1234
  },
1235
  {
1236
  "epoch": 4.89,
1237
  "learning_rate": 2.8325123152709358e-05,
1238
+ "loss": 0.6129,
1239
  "step": 1990
1240
  },
1241
  {
1242
  "epoch": 4.92,
1243
  "learning_rate": 2.8188286808976466e-05,
1244
+ "loss": 0.7099,
1245
  "step": 2000
1246
  },
1247
  {
1248
  "epoch": 4.94,
1249
  "learning_rate": 2.8051450465243574e-05,
1250
+ "loss": 0.5781,
1251
  "step": 2010
1252
  },
1253
  {
1254
  "epoch": 4.97,
1255
  "learning_rate": 2.7914614121510675e-05,
1256
+ "loss": 0.6978,
1257
  "step": 2020
1258
  },
1259
  {
1260
  "epoch": 4.99,
1261
  "learning_rate": 2.777777777777778e-05,
1262
+ "loss": 0.6746,
1263
  "step": 2030
1264
  },
1265
  {
1266
  "epoch": 5.0,
1267
+ "eval_accuracy": 0.957476231633535,
1268
+ "eval_loss": 0.21456871926784515,
1269
+ "eval_runtime": 27.8285,
1270
+ "eval_samples_per_second": 207.881,
1271
+ "eval_steps_per_second": 6.504,
1272
  "step": 2033
1273
  },
1274
  {
1275
  "epoch": 5.02,
1276
  "learning_rate": 2.764094143404488e-05,
1277
+ "loss": 0.6939,
1278
  "step": 2040
1279
  },
1280
  {
1281
  "epoch": 5.04,
1282
  "learning_rate": 2.7504105090311988e-05,
1283
+ "loss": 0.5992,
1284
  "step": 2050
1285
  },
1286
  {
1287
  "epoch": 5.06,
1288
  "learning_rate": 2.7367268746579096e-05,
1289
+ "loss": 0.6252,
1290
  "step": 2060
1291
  },
1292
  {
1293
  "epoch": 5.09,
1294
  "learning_rate": 2.7230432402846197e-05,
1295
+ "loss": 0.6173,
1296
  "step": 2070
1297
  },
1298
  {
1299
  "epoch": 5.11,
1300
  "learning_rate": 2.70935960591133e-05,
1301
+ "loss": 0.6292,
1302
  "step": 2080
1303
  },
1304
  {
1305
  "epoch": 5.14,
1306
  "learning_rate": 2.695675971538041e-05,
1307
+ "loss": 0.6444,
1308
  "step": 2090
1309
  },
1310
  {
1311
  "epoch": 5.16,
1312
  "learning_rate": 2.681992337164751e-05,
1313
+ "loss": 0.6745,
1314
  "step": 2100
1315
  },
1316
  {
1317
  "epoch": 5.19,
1318
  "learning_rate": 2.6683087027914618e-05,
1319
+ "loss": 0.6157,
1320
  "step": 2110
1321
  },
1322
  {
1323
  "epoch": 5.21,
1324
  "learning_rate": 2.6546250684181722e-05,
1325
+ "loss": 0.5988,
1326
  "step": 2120
1327
  },
1328
  {
1329
  "epoch": 5.24,
1330
  "learning_rate": 2.6409414340448823e-05,
1331
+ "loss": 0.6707,
1332
  "step": 2130
1333
  },
1334
  {
1335
  "epoch": 5.26,
1336
  "learning_rate": 2.627257799671593e-05,
1337
+ "loss": 0.5951,
1338
  "step": 2140
1339
  },
1340
  {
1341
  "epoch": 5.29,
1342
  "learning_rate": 2.6135741652983032e-05,
1343
+ "loss": 0.5863,
1344
  "step": 2150
1345
  },
1346
  {
1347
  "epoch": 5.31,
1348
  "learning_rate": 2.5998905309250136e-05,
1349
+ "loss": 0.6182,
1350
  "step": 2160
1351
  },
1352
  {
1353
  "epoch": 5.33,
1354
  "learning_rate": 2.5862068965517244e-05,
1355
+ "loss": 0.6163,
1356
  "step": 2170
1357
  },
1358
  {
1359
  "epoch": 5.36,
1360
  "learning_rate": 2.5725232621784345e-05,
1361
+ "loss": 0.6525,
1362
  "step": 2180
1363
  },
1364
  {
1365
  "epoch": 5.38,
1366
  "learning_rate": 2.5588396278051453e-05,
1367
+ "loss": 0.6545,
1368
  "step": 2190
1369
  },
1370
  {
1371
  "epoch": 5.41,
1372
  "learning_rate": 2.5451559934318557e-05,
1373
+ "loss": 0.6883,
1374
  "step": 2200
1375
  },
1376
  {
1377
  "epoch": 5.43,
1378
  "learning_rate": 2.531472359058566e-05,
1379
+ "loss": 0.6644,
1380
  "step": 2210
1381
  },
1382
  {
1383
  "epoch": 5.46,
1384
  "learning_rate": 2.5177887246852766e-05,
1385
+ "loss": 0.641,
1386
  "step": 2220
1387
  },
1388
  {
1389
  "epoch": 5.48,
1390
  "learning_rate": 2.5041050903119874e-05,
1391
+ "loss": 0.6029,
1392
  "step": 2230
1393
  },
1394
  {
1395
  "epoch": 5.51,
1396
  "learning_rate": 2.4904214559386975e-05,
1397
+ "loss": 0.6049,
1398
  "step": 2240
1399
  },
1400
  {
1401
  "epoch": 5.53,
1402
  "learning_rate": 2.476737821565408e-05,
1403
+ "loss": 0.6282,
1404
  "step": 2250
1405
  },
1406
  {
1407
  "epoch": 5.56,
1408
  "learning_rate": 2.4630541871921184e-05,
1409
+ "loss": 0.5836,
1410
  "step": 2260
1411
  },
1412
  {
1413
  "epoch": 5.58,
1414
  "learning_rate": 2.4493705528188288e-05,
1415
+ "loss": 0.6288,
1416
  "step": 2270
1417
  },
1418
  {
1419
  "epoch": 5.61,
1420
  "learning_rate": 2.4356869184455393e-05,
1421
+ "loss": 0.5648,
1422
  "step": 2280
1423
  },
1424
  {
1425
  "epoch": 5.63,
1426
  "learning_rate": 2.4220032840722497e-05,
1427
+ "loss": 0.5689,
1428
  "step": 2290
1429
  },
1430
  {
1431
  "epoch": 5.65,
1432
  "learning_rate": 2.40831964969896e-05,
1433
+ "loss": 0.6038,
1434
  "step": 2300
1435
  },
1436
  {
1437
  "epoch": 5.68,
1438
  "learning_rate": 2.3946360153256706e-05,
1439
+ "loss": 0.5866,
1440
  "step": 2310
1441
  },
1442
  {
1443
  "epoch": 5.7,
1444
  "learning_rate": 2.380952380952381e-05,
1445
+ "loss": 0.6283,
1446
  "step": 2320
1447
  },
1448
  {
1449
  "epoch": 5.73,
1450
  "learning_rate": 2.3672687465790915e-05,
1451
+ "loss": 0.6445,
1452
  "step": 2330
1453
  },
1454
  {
1455
  "epoch": 5.75,
1456
  "learning_rate": 2.353585112205802e-05,
1457
+ "loss": 0.6545,
1458
  "step": 2340
1459
  },
1460
  {
1461
  "epoch": 5.78,
1462
  "learning_rate": 2.3399014778325123e-05,
1463
+ "loss": 0.6394,
1464
  "step": 2350
1465
  },
1466
  {
1467
  "epoch": 5.8,
1468
  "learning_rate": 2.326217843459223e-05,
1469
+ "loss": 0.5928,
1470
  "step": 2360
1471
  },
1472
  {
1473
  "epoch": 5.83,
1474
  "learning_rate": 2.3125342090859332e-05,
1475
+ "loss": 0.6121,
1476
  "step": 2370
1477
  },
1478
  {
1479
  "epoch": 5.85,
1480
  "learning_rate": 2.2988505747126437e-05,
1481
+ "loss": 0.6055,
1482
  "step": 2380
1483
  },
1484
  {
1485
  "epoch": 5.88,
1486
  "learning_rate": 2.285166940339354e-05,
1487
+ "loss": 0.5902,
1488
  "step": 2390
1489
  },
1490
  {
1491
  "epoch": 5.9,
1492
  "learning_rate": 2.271483305966065e-05,
1493
+ "loss": 0.5764,
1494
  "step": 2400
1495
  },
1496
  {
1497
  "epoch": 5.93,
1498
  "learning_rate": 2.2577996715927753e-05,
1499
+ "loss": 0.5722,
1500
  "step": 2410
1501
  },
1502
  {
1503
  "epoch": 5.95,
1504
  "learning_rate": 2.2441160372194854e-05,
1505
+ "loss": 0.5832,
1506
  "step": 2420
1507
  },
1508
  {
1509
  "epoch": 5.97,
1510
  "learning_rate": 2.230432402846196e-05,
1511
+ "loss": 0.5632,
1512
  "step": 2430
1513
  },
1514
  {
1515
  "epoch": 6.0,
1516
  "learning_rate": 2.2167487684729066e-05,
1517
+ "loss": 0.546,
1518
  "step": 2440
1519
  },
1520
  {
1521
  "epoch": 6.0,
1522
+ "eval_accuracy": 0.9609334485738981,
1523
+ "eval_loss": 0.2027408331632614,
1524
+ "eval_runtime": 27.561,
1525
+ "eval_samples_per_second": 209.898,
1526
+ "eval_steps_per_second": 6.567,
1527
  "step": 2440
1528
  },
1529
  {
1530
  "epoch": 6.02,
1531
  "learning_rate": 2.203065134099617e-05,
1532
+ "loss": 0.5745,
1533
  "step": 2450
1534
  },
1535
  {
1536
  "epoch": 6.05,
1537
  "learning_rate": 2.1893814997263272e-05,
1538
+ "loss": 0.5959,
1539
  "step": 2460
1540
  },
1541
  {
1542
  "epoch": 6.07,
1543
  "learning_rate": 2.175697865353038e-05,
1544
+ "loss": 0.6355,
1545
  "step": 2470
1546
  },
1547
  {
1548
  "epoch": 6.1,
1549
  "learning_rate": 2.1620142309797484e-05,
1550
+ "loss": 0.5944,
1551
  "step": 2480
1552
  },
1553
  {
1554
  "epoch": 6.12,
1555
  "learning_rate": 2.148330596606459e-05,
1556
+ "loss": 0.5942,
1557
  "step": 2490
1558
  },
1559
  {
1560
  "epoch": 6.15,
1561
  "learning_rate": 2.1346469622331693e-05,
1562
+ "loss": 0.5985,
1563
  "step": 2500
1564
  },
1565
  {
1566
  "epoch": 6.17,
1567
  "learning_rate": 2.1209633278598797e-05,
1568
+ "loss": 0.5785,
1569
  "step": 2510
1570
  },
1571
  {
1572
  "epoch": 6.2,
1573
  "learning_rate": 2.10727969348659e-05,
1574
+ "loss": 0.5717,
1575
  "step": 2520
1576
  },
1577
  {
1578
  "epoch": 6.22,
1579
  "learning_rate": 2.0935960591133006e-05,
1580
+ "loss": 0.6025,
1581
  "step": 2530
1582
  },
1583
  {
1584
  "epoch": 6.24,
1585
  "learning_rate": 2.079912424740011e-05,
1586
+ "loss": 0.5668,
1587
  "step": 2540
1588
  },
1589
  {
1590
  "epoch": 6.27,
1591
  "learning_rate": 2.0662287903667215e-05,
1592
+ "loss": 0.5703,
1593
  "step": 2550
1594
  },
1595
  {
1596
  "epoch": 6.29,
1597
  "learning_rate": 2.052545155993432e-05,
1598
+ "loss": 0.6261,
1599
  "step": 2560
1600
  },
1601
  {
1602
  "epoch": 6.32,
1603
  "learning_rate": 2.0388615216201424e-05,
1604
+ "loss": 0.4868,
1605
  "step": 2570
1606
  },
1607
  {
1608
  "epoch": 6.34,
1609
  "learning_rate": 2.025177887246853e-05,
1610
+ "loss": 0.6238,
1611
  "step": 2580
1612
  },
1613
  {
1614
  "epoch": 6.37,
1615
  "learning_rate": 2.0114942528735632e-05,
1616
+ "loss": 0.5533,
1617
  "step": 2590
1618
  },
1619
  {
1620
  "epoch": 6.39,
1621
  "learning_rate": 1.9978106185002737e-05,
1622
+ "loss": 0.5589,
1623
  "step": 2600
1624
  },
1625
  {
1626
  "epoch": 6.42,
1627
  "learning_rate": 1.984126984126984e-05,
1628
+ "loss": 0.6414,
1629
  "step": 2610
1630
  },
1631
  {
1632
  "epoch": 6.44,
1633
  "learning_rate": 1.970443349753695e-05,
1634
+ "loss": 0.5837,
1635
  "step": 2620
1636
  },
1637
  {
1638
  "epoch": 6.47,
1639
  "learning_rate": 1.956759715380405e-05,
1640
+ "loss": 0.5744,
1641
  "step": 2630
1642
  },
1643
  {
1644
  "epoch": 6.49,
1645
  "learning_rate": 1.9430760810071154e-05,
1646
+ "loss": 0.5979,
1647
  "step": 2640
1648
  },
1649
  {
1650
  "epoch": 6.52,
1651
  "learning_rate": 1.929392446633826e-05,
1652
+ "loss": 0.5642,
1653
  "step": 2650
1654
  },
1655
  {
1656
  "epoch": 6.54,
1657
  "learning_rate": 1.9157088122605367e-05,
1658
+ "loss": 0.508,
1659
  "step": 2660
1660
  },
1661
  {
1662
  "epoch": 6.56,
1663
  "learning_rate": 1.9020251778872468e-05,
1664
+ "loss": 0.6201,
1665
  "step": 2670
1666
  },
1667
  {
1668
  "epoch": 6.59,
1669
  "learning_rate": 1.8883415435139572e-05,
1670
+ "loss": 0.6078,
1671
  "step": 2680
1672
  },
1673
  {
1674
  "epoch": 6.61,
1675
  "learning_rate": 1.874657909140668e-05,
1676
+ "loss": 0.6139,
1677
  "step": 2690
1678
  },
1679
  {
1680
  "epoch": 6.64,
1681
  "learning_rate": 1.8609742747673784e-05,
1682
+ "loss": 0.5885,
1683
  "step": 2700
1684
  },
1685
  {
1686
  "epoch": 6.66,
1687
  "learning_rate": 1.847290640394089e-05,
1688
+ "loss": 0.5809,
1689
  "step": 2710
1690
  },
1691
  {
1692
  "epoch": 6.69,
1693
  "learning_rate": 1.833607006020799e-05,
1694
+ "loss": 0.5503,
1695
  "step": 2720
1696
  },
1697
  {
1698
  "epoch": 6.71,
1699
  "learning_rate": 1.8199233716475097e-05,
1700
+ "loss": 0.5675,
1701
  "step": 2730
1702
  },
1703
  {
1704
  "epoch": 6.74,
1705
  "learning_rate": 1.8062397372742202e-05,
1706
+ "loss": 0.554,
1707
  "step": 2740
1708
  },
1709
  {
1710
  "epoch": 6.76,
1711
  "learning_rate": 1.7925561029009306e-05,
1712
+ "loss": 0.6173,
1713
  "step": 2750
1714
  },
1715
  {
1716
  "epoch": 6.79,
1717
  "learning_rate": 1.7788724685276407e-05,
1718
+ "loss": 0.5588,
1719
  "step": 2760
1720
  },
1721
  {
1722
  "epoch": 6.81,
1723
  "learning_rate": 1.7651888341543515e-05,
1724
+ "loss": 0.5736,
1725
  "step": 2770
1726
  },
1727
  {
1728
  "epoch": 6.83,
1729
  "learning_rate": 1.751505199781062e-05,
1730
+ "loss": 0.5748,
1731
  "step": 2780
1732
  },
1733
  {
1734
  "epoch": 6.86,
1735
  "learning_rate": 1.7378215654077724e-05,
1736
+ "loss": 0.6228,
1737
  "step": 2790
1738
  },
1739
  {
1740
  "epoch": 6.88,
1741
  "learning_rate": 1.7241379310344828e-05,
1742
+ "loss": 0.635,
1743
  "step": 2800
1744
  },
1745
  {
1746
  "epoch": 6.91,
1747
  "learning_rate": 1.7104542966611933e-05,
1748
+ "loss": 0.5662,
1749
  "step": 2810
1750
  },
1751
  {
1752
  "epoch": 6.93,
1753
  "learning_rate": 1.6967706622879037e-05,
1754
+ "loss": 0.6354,
1755
  "step": 2820
1756
  },
1757
  {
1758
  "epoch": 6.96,
1759
  "learning_rate": 1.683087027914614e-05,
1760
+ "loss": 0.5635,
1761
  "step": 2830
1762
  },
1763
  {
1764
  "epoch": 6.98,
1765
  "learning_rate": 1.6694033935413246e-05,
1766
+ "loss": 0.5983,
1767
  "step": 2840
1768
  },
1769
  {
1770
  "epoch": 7.0,
1771
+ "eval_accuracy": 0.9640449438202248,
1772
+ "eval_loss": 0.19191542267799377,
1773
+ "eval_runtime": 27.0395,
1774
+ "eval_samples_per_second": 213.946,
1775
+ "eval_steps_per_second": 6.694,
1776
  "step": 2847
1777
  },
1778
  {
1779
  "epoch": 7.01,
1780
  "learning_rate": 1.655719759168035e-05,
1781
+ "loss": 0.5661,
1782
  "step": 2850
1783
  },
1784
  {
1785
  "epoch": 7.03,
1786
  "learning_rate": 1.6420361247947455e-05,
1787
+ "loss": 0.5409,
1788
  "step": 2860
1789
  },
1790
  {
1791
  "epoch": 7.06,
1792
  "learning_rate": 1.628352490421456e-05,
1793
+ "loss": 0.5863,
1794
  "step": 2870
1795
  },
1796
  {
1797
  "epoch": 7.08,
1798
  "learning_rate": 1.6146688560481667e-05,
1799
+ "loss": 0.5587,
1800
  "step": 2880
1801
  },
1802
  {
1803
  "epoch": 7.11,
1804
  "learning_rate": 1.6009852216748768e-05,
1805
+ "loss": 0.5921,
1806
  "step": 2890
1807
  },
1808
  {
1809
  "epoch": 7.13,
1810
  "learning_rate": 1.5873015873015872e-05,
1811
+ "loss": 0.5591,
1812
  "step": 2900
1813
  },
1814
  {
1815
  "epoch": 7.15,
1816
  "learning_rate": 1.573617952928298e-05,
1817
+ "loss": 0.6061,
1818
  "step": 2910
1819
  },
1820
  {
1821
  "epoch": 7.18,
1822
  "learning_rate": 1.5599343185550085e-05,
1823
+ "loss": 0.5408,
1824
  "step": 2920
1825
  },
1826
  {
1827
  "epoch": 7.2,
1828
  "learning_rate": 1.5462506841817186e-05,
1829
+ "loss": 0.5585,
1830
  "step": 2930
1831
  },
1832
  {
1833
  "epoch": 7.23,
1834
  "learning_rate": 1.532567049808429e-05,
1835
+ "loss": 0.5551,
1836
  "step": 2940
1837
  },
1838
  {
1839
  "epoch": 7.25,
1840
  "learning_rate": 1.5188834154351398e-05,
1841
+ "loss": 0.5976,
1842
  "step": 2950
1843
  },
1844
  {
1845
  "epoch": 7.28,
1846
  "learning_rate": 1.50519978106185e-05,
1847
+ "loss": 0.5359,
1848
  "step": 2960
1849
  },
1850
  {
1851
  "epoch": 7.3,
1852
  "learning_rate": 1.4915161466885605e-05,
1853
+ "loss": 0.5213,
1854
  "step": 2970
1855
  },
1856
  {
1857
  "epoch": 7.33,
1858
  "learning_rate": 1.4778325123152711e-05,
1859
+ "loss": 0.552,
1860
  "step": 2980
1861
  },
1862
  {
1863
  "epoch": 7.35,
1864
  "learning_rate": 1.4641488779419815e-05,
1865
+ "loss": 0.5372,
1866
  "step": 2990
1867
  },
1868
  {
1869
  "epoch": 7.38,
1870
  "learning_rate": 1.450465243568692e-05,
1871
+ "loss": 0.61,
1872
  "step": 3000
1873
  },
1874
  {
1875
  "epoch": 7.4,
1876
  "learning_rate": 1.4367816091954022e-05,
1877
+ "loss": 0.5742,
1878
  "step": 3010
1879
  },
1880
  {
1881
  "epoch": 7.42,
1882
  "learning_rate": 1.4230979748221129e-05,
1883
+ "loss": 0.5856,
1884
  "step": 3020
1885
  },
1886
  {
1887
  "epoch": 7.45,
1888
  "learning_rate": 1.4094143404488233e-05,
1889
+ "loss": 0.5193,
1890
  "step": 3030
1891
  },
1892
  {
1893
  "epoch": 7.47,
1894
  "learning_rate": 1.3957307060755337e-05,
1895
+ "loss": 0.5345,
1896
  "step": 3040
1897
  },
1898
  {
1899
  "epoch": 7.5,
1900
  "learning_rate": 1.382047071702244e-05,
1901
+ "loss": 0.5558,
1902
  "step": 3050
1903
  },
1904
  {
1905
  "epoch": 7.52,
1906
  "learning_rate": 1.3683634373289548e-05,
1907
+ "loss": 0.58,
1908
  "step": 3060
1909
  },
1910
  {
1911
  "epoch": 7.55,
1912
  "learning_rate": 1.354679802955665e-05,
1913
+ "loss": 0.5425,
1914
  "step": 3070
1915
  },
1916
  {
1917
  "epoch": 7.57,
1918
  "learning_rate": 1.3409961685823755e-05,
1919
+ "loss": 0.6588,
1920
  "step": 3080
1921
  },
1922
  {
1923
  "epoch": 7.6,
1924
  "learning_rate": 1.3273125342090861e-05,
1925
+ "loss": 0.5137,
1926
  "step": 3090
1927
  },
1928
  {
1929
  "epoch": 7.62,
1930
  "learning_rate": 1.3136288998357965e-05,
1931
+ "loss": 0.5975,
1932
  "step": 3100
1933
  },
1934
  {
1935
  "epoch": 7.65,
1936
  "learning_rate": 1.2999452654625068e-05,
1937
+ "loss": 0.5728,
1938
  "step": 3110
1939
  },
1940
  {
1941
  "epoch": 7.67,
1942
  "learning_rate": 1.2862616310892173e-05,
1943
+ "loss": 0.5112,
1944
  "step": 3120
1945
  },
1946
  {
1947
  "epoch": 7.7,
1948
  "learning_rate": 1.2725779967159279e-05,
1949
+ "loss": 0.5435,
1950
  "step": 3130
1951
  },
1952
  {
1953
  "epoch": 7.72,
1954
  "learning_rate": 1.2588943623426383e-05,
1955
+ "loss": 0.5792,
1956
  "step": 3140
1957
  },
1958
  {
1959
  "epoch": 7.74,
1960
  "learning_rate": 1.2452107279693487e-05,
1961
+ "loss": 0.5114,
1962
  "step": 3150
1963
  },
1964
  {
1965
  "epoch": 7.77,
1966
  "learning_rate": 1.2315270935960592e-05,
1967
+ "loss": 0.5976,
1968
  "step": 3160
1969
  },
1970
  {
1971
  "epoch": 7.79,
1972
  "learning_rate": 1.2178434592227696e-05,
1973
+ "loss": 0.5955,
1974
  "step": 3170
1975
  },
1976
  {
1977
  "epoch": 7.82,
1978
  "learning_rate": 1.20415982484948e-05,
1979
+ "loss": 0.5476,
1980
  "step": 3180
1981
  },
1982
  {
1983
  "epoch": 7.84,
1984
  "learning_rate": 1.1904761904761905e-05,
1985
+ "loss": 0.5329,
1986
  "step": 3190
1987
  },
1988
  {
1989
  "epoch": 7.87,
1990
  "learning_rate": 1.176792556102901e-05,
1991
+ "loss": 0.5765,
1992
  "step": 3200
1993
  },
1994
  {
1995
  "epoch": 7.89,
1996
  "learning_rate": 1.1631089217296116e-05,
1997
+ "loss": 0.6101,
1998
  "step": 3210
1999
  },
2000
  {
2001
  "epoch": 7.92,
2002
  "learning_rate": 1.1494252873563218e-05,
2003
+ "loss": 0.673,
2004
  "step": 3220
2005
  },
2006
  {
2007
  "epoch": 7.94,
2008
  "learning_rate": 1.1357416529830324e-05,
2009
+ "loss": 0.4893,
2010
  "step": 3230
2011
  },
2012
  {
2013
  "epoch": 7.97,
2014
  "learning_rate": 1.1220580186097427e-05,
2015
+ "loss": 0.4964,
2016
  "step": 3240
2017
  },
2018
  {
2019
  "epoch": 7.99,
2020
  "learning_rate": 1.1083743842364533e-05,
2021
+ "loss": 0.5653,
2022
  "step": 3250
2023
  },
2024
  {
2025
  "epoch": 8.0,
2026
+ "eval_accuracy": 0.9652549697493518,
2027
+ "eval_loss": 0.18624082207679749,
2028
+ "eval_runtime": 27.8689,
2029
+ "eval_samples_per_second": 207.579,
2030
+ "eval_steps_per_second": 6.495,
2031
  "step": 3254
2032
  },
2033
  {
2034
  "epoch": 8.01,
2035
  "learning_rate": 1.0946907498631636e-05,
2036
+ "loss": 0.5571,
2037
  "step": 3260
2038
  },
2039
  {
2040
  "epoch": 8.04,
2041
  "learning_rate": 1.0810071154898742e-05,
2042
+ "loss": 0.5585,
2043
  "step": 3270
2044
  },
2045
  {
2046
  "epoch": 8.06,
2047
  "learning_rate": 1.0673234811165846e-05,
2048
+ "loss": 0.5709,
2049
  "step": 3280
2050
  },
2051
  {
2052
  "epoch": 8.09,
2053
  "learning_rate": 1.053639846743295e-05,
2054
+ "loss": 0.5892,
2055
  "step": 3290
2056
  },
2057
  {
2058
  "epoch": 8.11,
2059
  "learning_rate": 1.0399562123700055e-05,
2060
+ "loss": 0.5463,
2061
  "step": 3300
2062
  },
2063
  {
2064
  "epoch": 8.14,
2065
  "learning_rate": 1.026272577996716e-05,
2066
+ "loss": 0.5466,
2067
  "step": 3310
2068
  },
2069
  {
2070
  "epoch": 8.16,
2071
  "learning_rate": 1.0125889436234266e-05,
2072
+ "loss": 0.5939,
2073
  "step": 3320
2074
  },
2075
  {
2076
  "epoch": 8.19,
2077
  "learning_rate": 9.989053092501368e-06,
2078
+ "loss": 0.5314,
2079
  "step": 3330
2080
  },
2081
  {
2082
  "epoch": 8.21,
2083
  "learning_rate": 9.852216748768475e-06,
2084
+ "loss": 0.4986,
2085
  "step": 3340
2086
  },
2087
  {
2088
  "epoch": 8.24,
2089
  "learning_rate": 9.715380405035577e-06,
2090
+ "loss": 0.622,
2091
  "step": 3350
2092
  },
2093
  {
2094
  "epoch": 8.26,
2095
  "learning_rate": 9.578544061302683e-06,
2096
+ "loss": 0.5836,
2097
  "step": 3360
2098
  },
2099
  {
2100
  "epoch": 8.29,
2101
  "learning_rate": 9.441707717569786e-06,
2102
+ "loss": 0.5582,
2103
  "step": 3370
2104
  },
2105
  {
2106
  "epoch": 8.31,
2107
  "learning_rate": 9.304871373836892e-06,
2108
+ "loss": 0.5248,
2109
  "step": 3380
2110
  },
2111
  {
2112
  "epoch": 8.33,
2113
  "learning_rate": 9.168035030103995e-06,
2114
+ "loss": 0.5676,
2115
  "step": 3390
2116
  },
2117
  {
2118
  "epoch": 8.36,
2119
  "learning_rate": 9.031198686371101e-06,
2120
+ "loss": 0.5659,
2121
  "step": 3400
2122
  },
2123
  {
2124
  "epoch": 8.38,
2125
  "learning_rate": 8.894362342638204e-06,
2126
+ "loss": 0.5762,
2127
  "step": 3410
2128
  },
2129
  {
2130
  "epoch": 8.41,
2131
  "learning_rate": 8.75752599890531e-06,
2132
+ "loss": 0.5365,
2133
  "step": 3420
2134
  },
2135
  {
2136
  "epoch": 8.43,
2137
  "learning_rate": 8.620689655172414e-06,
2138
+ "loss": 0.5394,
2139
  "step": 3430
2140
  },
2141
  {
2142
  "epoch": 8.46,
2143
  "learning_rate": 8.483853311439519e-06,
2144
+ "loss": 0.5327,
2145
  "step": 3440
2146
  },
2147
  {
2148
  "epoch": 8.48,
2149
  "learning_rate": 8.347016967706623e-06,
2150
+ "loss": 0.5503,
2151
  "step": 3450
2152
  },
2153
  {
2154
  "epoch": 8.51,
2155
  "learning_rate": 8.210180623973727e-06,
2156
+ "loss": 0.4698,
2157
  "step": 3460
2158
  },
2159
  {
2160
  "epoch": 8.53,
2161
  "learning_rate": 8.073344280240833e-06,
2162
+ "loss": 0.5264,
2163
  "step": 3470
2164
  },
2165
  {
2166
  "epoch": 8.56,
2167
  "learning_rate": 7.936507936507936e-06,
2168
+ "loss": 0.5127,
2169
  "step": 3480
2170
  },
2171
  {
2172
  "epoch": 8.58,
2173
  "learning_rate": 7.799671592775042e-06,
2174
+ "loss": 0.5655,
2175
  "step": 3490
2176
  },
2177
  {
2178
  "epoch": 8.6,
2179
  "learning_rate": 7.662835249042145e-06,
2180
+ "loss": 0.5611,
2181
  "step": 3500
2182
  },
2183
  {
2184
  "epoch": 8.63,
2185
  "learning_rate": 7.52599890530925e-06,
2186
+ "loss": 0.5306,
2187
  "step": 3510
2188
  },
2189
  {
2190
  "epoch": 8.65,
2191
  "learning_rate": 7.3891625615763555e-06,
2192
+ "loss": 0.531,
2193
  "step": 3520
2194
  },
2195
  {
2196
  "epoch": 8.68,
2197
  "learning_rate": 7.25232621784346e-06,
2198
+ "loss": 0.5006,
2199
  "step": 3530
2200
  },
2201
  {
2202
  "epoch": 8.7,
2203
  "learning_rate": 7.115489874110564e-06,
2204
+ "loss": 0.5036,
2205
  "step": 3540
2206
  },
2207
  {
2208
  "epoch": 8.73,
2209
  "learning_rate": 6.978653530377669e-06,
2210
+ "loss": 0.5519,
2211
  "step": 3550
2212
  },
2213
  {
2214
  "epoch": 8.75,
2215
  "learning_rate": 6.841817186644774e-06,
2216
+ "loss": 0.5233,
2217
  "step": 3560
2218
  },
2219
  {
2220
  "epoch": 8.78,
2221
  "learning_rate": 6.7049808429118775e-06,
2222
+ "loss": 0.567,
2223
  "step": 3570
2224
  },
2225
  {
2226
  "epoch": 8.8,
2227
  "learning_rate": 6.568144499178983e-06,
2228
+ "loss": 0.4994,
2229
  "step": 3580
2230
  },
2231
  {
2232
  "epoch": 8.83,
2233
  "learning_rate": 6.431308155446086e-06,
2234
+ "loss": 0.5869,
2235
  "step": 3590
2236
  },
2237
  {
2238
  "epoch": 8.85,
2239
  "learning_rate": 6.2944718117131915e-06,
2240
+ "loss": 0.5738,
2241
  "step": 3600
2242
  },
2243
  {
2244
  "epoch": 8.88,
2245
  "learning_rate": 6.157635467980296e-06,
2246
+ "loss": 0.4842,
2247
  "step": 3610
2248
  },
2249
  {
2250
  "epoch": 8.9,
2251
  "learning_rate": 6.0207991242474e-06,
2252
+ "loss": 0.5135,
2253
  "step": 3620
2254
  },
2255
  {
2256
  "epoch": 8.92,
2257
  "learning_rate": 5.883962780514505e-06,
2258
+ "loss": 0.5603,
2259
  "step": 3630
2260
  },
2261
  {
2262
  "epoch": 8.95,
2263
  "learning_rate": 5.747126436781609e-06,
2264
+ "loss": 0.5068,
2265
  "step": 3640
2266
  },
2267
  {
2268
  "epoch": 8.97,
2269
  "learning_rate": 5.6102900930487136e-06,
2270
+ "loss": 0.5566,
2271
  "step": 3650
2272
  },
2273
  {
2274
  "epoch": 9.0,
2275
  "learning_rate": 5.473453749315818e-06,
2276
+ "loss": 0.5361,
2277
  "step": 3660
2278
  },
2279
  {
2280
  "epoch": 9.0,
2281
+ "eval_accuracy": 0.9659464131374244,
2282
+ "eval_loss": 0.18145236372947693,
2283
+ "eval_runtime": 28.6712,
2284
+ "eval_samples_per_second": 201.771,
2285
+ "eval_steps_per_second": 6.313,
2286
  "step": 3660
2287
  },
2288
  {
2289
  "epoch": 9.02,
2290
  "learning_rate": 5.336617405582923e-06,
2291
+ "loss": 0.5322,
2292
  "step": 3670
2293
  },
2294
  {
2295
  "epoch": 9.05,
2296
  "learning_rate": 5.199781061850028e-06,
2297
+ "loss": 0.5509,
2298
  "step": 3680
2299
  },
2300
  {
2301
  "epoch": 9.07,
2302
  "learning_rate": 5.062944718117133e-06,
2303
+ "loss": 0.5806,
2304
  "step": 3690
2305
  },
2306
  {
2307
  "epoch": 9.1,
2308
  "learning_rate": 4.926108374384237e-06,
2309
+ "loss": 0.4942,
2310
  "step": 3700
2311
  },
2312
  {
2313
  "epoch": 9.12,
2314
  "learning_rate": 4.789272030651342e-06,
2315
+ "loss": 0.5086,
2316
  "step": 3710
2317
  },
2318
  {
2319
  "epoch": 9.15,
2320
  "learning_rate": 4.652435686918446e-06,
2321
+ "loss": 0.5668,
2322
  "step": 3720
2323
  },
2324
  {
2325
  "epoch": 9.17,
2326
  "learning_rate": 4.5155993431855505e-06,
2327
+ "loss": 0.5554,
2328
  "step": 3730
2329
  },
2330
  {
2331
  "epoch": 9.19,
2332
  "learning_rate": 4.378762999452655e-06,
2333
+ "loss": 0.5122,
2334
  "step": 3740
2335
  },
2336
  {
2337
  "epoch": 9.22,
2338
  "learning_rate": 4.241926655719759e-06,
2339
+ "loss": 0.5301,
2340
  "step": 3750
2341
  },
2342
  {
2343
  "epoch": 9.24,
2344
  "learning_rate": 4.105090311986864e-06,
2345
+ "loss": 0.5617,
2346
  "step": 3760
2347
  },
2348
  {
2349
  "epoch": 9.27,
2350
  "learning_rate": 3.968253968253968e-06,
2351
+ "loss": 0.5423,
2352
  "step": 3770
2353
  },
2354
  {
2355
  "epoch": 9.29,
2356
  "learning_rate": 3.8314176245210725e-06,
2357
+ "loss": 0.6171,
2358
  "step": 3780
2359
  },
2360
  {
2361
  "epoch": 9.32,
2362
  "learning_rate": 3.6945812807881777e-06,
2363
+ "loss": 0.498,
2364
  "step": 3790
2365
  },
2366
  {
2367
  "epoch": 9.34,
2368
  "learning_rate": 3.557744937055282e-06,
2369
+ "loss": 0.5354,
2370
  "step": 3800
2371
  },
2372
  {
2373
  "epoch": 9.37,
2374
  "learning_rate": 3.420908593322387e-06,
2375
+ "loss": 0.5846,
2376
  "step": 3810
2377
  },
2378
  {
2379
  "epoch": 9.39,
2380
  "learning_rate": 3.2840722495894914e-06,
2381
+ "loss": 0.502,
2382
  "step": 3820
2383
  },
2384
  {
2385
  "epoch": 9.42,
2386
  "learning_rate": 3.1472359058565958e-06,
2387
+ "loss": 0.5138,
2388
  "step": 3830
2389
  },
2390
  {
2391
  "epoch": 9.44,
2392
  "learning_rate": 3.0103995621237e-06,
2393
+ "loss": 0.5014,
2394
  "step": 3840
2395
  },
2396
  {
2397
  "epoch": 9.47,
2398
  "learning_rate": 2.8735632183908046e-06,
2399
+ "loss": 0.4884,
2400
  "step": 3850
2401
  },
2402
  {
2403
  "epoch": 9.49,
2404
  "learning_rate": 2.736726874657909e-06,
2405
+ "loss": 0.6063,
2406
  "step": 3860
2407
  },
2408
  {
2409
  "epoch": 9.51,
2410
  "learning_rate": 2.599890530925014e-06,
2411
+ "loss": 0.5386,
2412
  "step": 3870
2413
  },
2414
  {
2415
  "epoch": 9.54,
2416
  "learning_rate": 2.4630541871921186e-06,
2417
+ "loss": 0.5255,
2418
  "step": 3880
2419
  },
2420
  {
2421
  "epoch": 9.56,
2422
  "learning_rate": 2.326217843459223e-06,
2423
+ "loss": 0.5019,
2424
  "step": 3890
2425
  },
2426
  {
2427
  "epoch": 9.59,
2428
  "learning_rate": 2.1893814997263274e-06,
2429
+ "loss": 0.503,
2430
  "step": 3900
2431
  },
2432
  {
2433
  "epoch": 9.61,
2434
  "learning_rate": 2.052545155993432e-06,
2435
+ "loss": 0.5699,
2436
  "step": 3910
2437
  },
2438
  {
2439
  "epoch": 9.64,
2440
  "learning_rate": 1.9157088122605362e-06,
2441
+ "loss": 0.4565,
2442
  "step": 3920
2443
  },
2444
  {
2445
  "epoch": 9.66,
2446
  "learning_rate": 1.778872468527641e-06,
2447
+ "loss": 0.5098,
2448
  "step": 3930
2449
  },
2450
  {
2451
  "epoch": 9.69,
2452
  "learning_rate": 1.6420361247947457e-06,
2453
+ "loss": 0.5032,
2454
  "step": 3940
2455
  },
2456
  {
2457
  "epoch": 9.71,
2458
  "learning_rate": 1.50519978106185e-06,
2459
+ "loss": 0.4713,
2460
  "step": 3950
2461
  },
2462
  {
2463
  "epoch": 9.74,
2464
  "learning_rate": 1.3683634373289545e-06,
2465
+ "loss": 0.4554,
2466
  "step": 3960
2467
  },
2468
  {
2469
  "epoch": 9.76,
2470
  "learning_rate": 1.2315270935960593e-06,
2471
+ "loss": 0.4773,
2472
  "step": 3970
2473
  },
2474
  {
2475
  "epoch": 9.78,
2476
  "learning_rate": 1.0946907498631637e-06,
2477
+ "loss": 0.5536,
2478
  "step": 3980
2479
  },
2480
  {
2481
  "epoch": 9.81,
2482
  "learning_rate": 9.578544061302681e-07,
2483
+ "loss": 0.4913,
2484
  "step": 3990
2485
  },
2486
  {
2487
  "epoch": 9.83,
2488
  "learning_rate": 8.210180623973728e-07,
2489
+ "loss": 0.5538,
2490
  "step": 4000
2491
  },
2492
  {
2493
  "epoch": 9.86,
2494
  "learning_rate": 6.841817186644772e-07,
2495
+ "loss": 0.4838,
2496
  "step": 4010
2497
  },
2498
  {
2499
  "epoch": 9.88,
2500
  "learning_rate": 5.473453749315819e-07,
2501
+ "loss": 0.569,
2502
  "step": 4020
2503
  },
2504
  {
2505
  "epoch": 9.91,
2506
  "learning_rate": 4.105090311986864e-07,
2507
+ "loss": 0.5847,
2508
  "step": 4030
2509
  },
2510
  {
2511
  "epoch": 9.93,
2512
  "learning_rate": 2.7367268746579093e-07,
2513
+ "loss": 0.5507,
2514
  "step": 4040
2515
  },
2516
  {
2517
  "epoch": 9.96,
2518
  "learning_rate": 1.3683634373289546e-07,
2519
+ "loss": 0.5834,
2520
  "step": 4050
2521
  },
2522
  {
2523
  "epoch": 9.98,
2524
  "learning_rate": 0.0,
2525
+ "loss": 0.5017,
2526
  "step": 4060
2527
  },
2528
  {
2529
  "epoch": 9.98,
2530
+ "eval_accuracy": 0.9676750216076059,
2531
+ "eval_loss": 0.17719660699367523,
2532
+ "eval_runtime": 28.9267,
2533
+ "eval_samples_per_second": 199.989,
2534
+ "eval_steps_per_second": 6.257,
2535
  "step": 4060
2536
  },
2537
  {
2538
  "epoch": 9.98,
2539
  "step": 4060,
2540
  "total_flos": 1.3140619208067262e+19,
2541
+ "train_loss": 0.6519044913681857,
2542
+ "train_runtime": 4257.1599,
2543
+ "train_samples_per_second": 122.29,
2544
+ "train_steps_per_second": 0.954
2545
  }
2546
  ],
2547
  "logging_steps": 10,