nandyc commited on
Commit
7c3e2ac
1 Parent(s): 9609fd8

🍻 cheers

Browse files
Files changed (5) hide show
  1. README.md +3 -2
  2. all_results.json +9 -9
  3. eval_results.json +5 -5
  4. train_results.json +4 -4
  5. trainer_state.json +438 -438
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: apache-2.0
3
  base_model: microsoft/swin-tiny-patch4-window7-224
4
  tags:
 
5
  - generated_from_trainer
6
  metrics:
7
  - accuracy
@@ -15,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # swin-tiny-patch4-window7-224-finetuned_ASL_Isolated_Swin_dataset2
17
 
18
- This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.0909
21
  - Accuracy: 0.9769
22
 
23
  ## Model description
 
2
  license: apache-2.0
3
  base_model: microsoft/swin-tiny-patch4-window7-224
4
  tags:
5
+ - image-classification
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
 
16
 
17
  # swin-tiny-patch4-window7-224-finetuned_ASL_Isolated_Swin_dataset2
18
 
19
+ This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the ASL_Isolated_Swin_dataset dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.1269
22
  - Accuracy: 0.9769
23
 
24
  ## Model description
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9846153846153847,
4
- "eval_loss": 0.05584708973765373,
5
- "eval_runtime": 2.0579,
6
- "eval_samples_per_second": 126.342,
7
- "eval_steps_per_second": 16.036,
8
  "total_flos": 7.30261234607063e+17,
9
- "train_loss": 0.5342570722103119,
10
- "train_runtime": 409.3983,
11
- "train_samples_per_second": 71.715,
12
- "train_steps_per_second": 4.494
13
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.9769230769230769,
4
+ "eval_loss": 0.12689848244190216,
5
+ "eval_runtime": 1.9212,
6
+ "eval_samples_per_second": 135.332,
7
+ "eval_steps_per_second": 17.177,
8
  "total_flos": 7.30261234607063e+17,
9
+ "train_loss": 0.5132673807766126,
10
+ "train_runtime": 395.7262,
11
+ "train_samples_per_second": 74.193,
12
+ "train_steps_per_second": 4.65
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9846153846153847,
4
- "eval_loss": 0.05584708973765373,
5
- "eval_runtime": 2.0579,
6
- "eval_samples_per_second": 126.342,
7
- "eval_steps_per_second": 16.036
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.9769230769230769,
4
+ "eval_loss": 0.12689848244190216,
5
+ "eval_runtime": 1.9212,
6
+ "eval_samples_per_second": 135.332,
7
+ "eval_steps_per_second": 17.177
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 7.30261234607063e+17,
4
- "train_loss": 0.5342570722103119,
5
- "train_runtime": 409.3983,
6
- "train_samples_per_second": 71.715,
7
- "train_steps_per_second": 4.494
8
  }
 
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 7.30261234607063e+17,
4
+ "train_loss": 0.5132673807766126,
5
+ "train_runtime": 395.7262,
6
+ "train_samples_per_second": 74.193,
7
+ "train_steps_per_second": 4.65
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.9846153846153847,
3
  "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned_ASL_Isolated_Swin_dataset2/checkpoint-1700",
4
  "epoch": 20.0,
5
  "eval_steps": 100,
@@ -10,1278 +10,1278 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.11,
13
- "learning_rate": 0.00019891304347826087,
14
- "loss": 3.3365,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.22,
19
- "learning_rate": 0.00019782608695652175,
20
- "loss": 3.2188,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.33,
25
- "learning_rate": 0.00019673913043478263,
26
- "loss": 3.1756,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.43,
31
- "learning_rate": 0.0001956521739130435,
32
- "loss": 2.9023,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.54,
37
- "learning_rate": 0.00019467391304347825,
38
- "loss": 2.5517,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.65,
43
- "learning_rate": 0.00019358695652173916,
44
- "loss": 2.4131,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.76,
49
- "learning_rate": 0.00019250000000000002,
50
- "loss": 2.0957,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.87,
55
- "learning_rate": 0.00019141304347826087,
56
- "loss": 1.8856,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.98,
61
- "learning_rate": 0.00019032608695652176,
62
- "loss": 1.7217,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 1.09,
67
- "learning_rate": 0.0001892391304347826,
68
- "loss": 1.6758,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 1.09,
73
- "eval_accuracy": 0.5769230769230769,
74
- "eval_loss": 1.320558786392212,
75
- "eval_runtime": 2.846,
76
- "eval_samples_per_second": 91.356,
77
- "eval_steps_per_second": 11.595,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 1.2,
82
- "learning_rate": 0.0001881521739130435,
83
- "loss": 1.4695,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 1.3,
88
- "learning_rate": 0.00018706521739130435,
89
- "loss": 1.311,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 1.41,
94
- "learning_rate": 0.00018597826086956523,
95
- "loss": 1.2741,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 1.52,
100
- "learning_rate": 0.0001848913043478261,
101
- "loss": 1.3444,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 1.63,
106
- "learning_rate": 0.00018380434782608697,
107
- "loss": 1.0445,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 1.74,
112
  "learning_rate": 0.00018282608695652174,
113
- "loss": 1.0316,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 1.85,
118
  "learning_rate": 0.00018173913043478262,
119
- "loss": 1.0877,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 1.96,
124
  "learning_rate": 0.00018065217391304348,
125
- "loss": 1.1561,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 2.07,
130
  "learning_rate": 0.00017956521739130436,
131
- "loss": 1.0726,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 2.17,
136
  "learning_rate": 0.00017847826086956522,
137
- "loss": 1.0717,
138
  "step": 200
139
  },
140
  {
141
  "epoch": 2.17,
142
- "eval_accuracy": 0.8153846153846154,
143
- "eval_loss": 0.6482219696044922,
144
- "eval_runtime": 1.825,
145
- "eval_samples_per_second": 142.465,
146
- "eval_steps_per_second": 18.082,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 2.28,
151
  "learning_rate": 0.0001773913043478261,
152
- "loss": 0.744,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 2.39,
157
  "learning_rate": 0.00017630434782608695,
158
- "loss": 0.9959,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 2.5,
163
  "learning_rate": 0.00017521739130434784,
164
- "loss": 0.9342,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 2.61,
169
  "learning_rate": 0.00017413043478260872,
170
- "loss": 0.8275,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 2.72,
175
  "learning_rate": 0.00017304347826086958,
176
- "loss": 0.8388,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 2.83,
181
  "learning_rate": 0.00017195652173913043,
182
- "loss": 0.8996,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 2.93,
187
  "learning_rate": 0.00017086956521739132,
188
- "loss": 0.6616,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 3.04,
193
  "learning_rate": 0.0001697826086956522,
194
- "loss": 0.7714,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 3.15,
199
  "learning_rate": 0.00016869565217391306,
200
- "loss": 0.7296,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 3.26,
205
  "learning_rate": 0.0001676086956521739,
206
- "loss": 0.627,
207
  "step": 300
208
  },
209
  {
210
  "epoch": 3.26,
211
- "eval_accuracy": 0.8653846153846154,
212
- "eval_loss": 0.4485549330711365,
213
- "eval_runtime": 2.5537,
214
- "eval_samples_per_second": 101.814,
215
- "eval_steps_per_second": 12.923,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 3.37,
220
- "learning_rate": 0.0001666304347826087,
221
- "loss": 0.8732,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 3.48,
226
- "learning_rate": 0.00016554347826086958,
227
- "loss": 0.8933,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 3.59,
232
- "learning_rate": 0.00016445652173913044,
233
- "loss": 0.8468,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 3.7,
238
- "learning_rate": 0.0001633695652173913,
239
- "loss": 0.6468,
240
  "step": 340
241
  },
242
  {
243
  "epoch": 3.8,
244
- "learning_rate": 0.00016228260869565218,
245
- "loss": 0.7266,
246
  "step": 350
247
  },
248
  {
249
  "epoch": 3.91,
250
- "learning_rate": 0.00016119565217391306,
251
- "loss": 0.8522,
252
  "step": 360
253
  },
254
  {
255
  "epoch": 4.02,
256
- "learning_rate": 0.00016010869565217392,
257
- "loss": 0.5888,
258
  "step": 370
259
  },
260
  {
261
  "epoch": 4.13,
262
- "learning_rate": 0.00015902173913043478,
263
- "loss": 0.685,
264
  "step": 380
265
  },
266
  {
267
  "epoch": 4.24,
268
- "learning_rate": 0.00015793478260869566,
269
- "loss": 0.6937,
270
  "step": 390
271
  },
272
  {
273
  "epoch": 4.35,
274
- "learning_rate": 0.00015684782608695654,
275
- "loss": 0.5397,
276
  "step": 400
277
  },
278
  {
279
  "epoch": 4.35,
280
- "eval_accuracy": 0.8923076923076924,
281
- "eval_loss": 0.2977767884731293,
282
- "eval_runtime": 1.8479,
283
- "eval_samples_per_second": 140.701,
284
- "eval_steps_per_second": 17.858,
285
  "step": 400
286
  },
287
  {
288
  "epoch": 4.46,
289
- "learning_rate": 0.0001557608695652174,
290
- "loss": 0.6058,
291
  "step": 410
292
  },
293
  {
294
  "epoch": 4.57,
295
- "learning_rate": 0.00015467391304347828,
296
- "loss": 0.553,
297
  "step": 420
298
  },
299
  {
300
  "epoch": 4.67,
301
- "learning_rate": 0.00015358695652173914,
302
- "loss": 0.4621,
303
  "step": 430
304
  },
305
  {
306
  "epoch": 4.78,
307
- "learning_rate": 0.0001525,
308
- "loss": 0.5002,
309
  "step": 440
310
  },
311
  {
312
  "epoch": 4.89,
313
- "learning_rate": 0.00015141304347826088,
314
- "loss": 0.4833,
315
  "step": 450
316
  },
317
  {
318
  "epoch": 5.0,
319
- "learning_rate": 0.00015032608695652176,
320
- "loss": 0.5952,
321
  "step": 460
322
  },
323
  {
324
  "epoch": 5.11,
325
- "learning_rate": 0.00014923913043478262,
326
- "loss": 0.5049,
327
  "step": 470
328
  },
329
  {
330
  "epoch": 5.22,
331
- "learning_rate": 0.00014815217391304347,
332
- "loss": 0.5219,
333
  "step": 480
334
  },
335
  {
336
  "epoch": 5.33,
337
- "learning_rate": 0.00014706521739130436,
338
- "loss": 0.6222,
339
  "step": 490
340
  },
341
  {
342
  "epoch": 5.43,
343
- "learning_rate": 0.00014597826086956524,
344
- "loss": 0.537,
345
  "step": 500
346
  },
347
  {
348
  "epoch": 5.43,
349
- "eval_accuracy": 0.9423076923076923,
350
- "eval_loss": 0.151279479265213,
351
- "eval_runtime": 1.8783,
352
- "eval_samples_per_second": 138.42,
353
- "eval_steps_per_second": 17.569,
354
  "step": 500
355
  },
356
  {
357
  "epoch": 5.54,
358
- "learning_rate": 0.0001448913043478261,
359
- "loss": 0.498,
360
  "step": 510
361
  },
362
  {
363
  "epoch": 5.65,
364
- "learning_rate": 0.00014380434782608695,
365
- "loss": 0.5532,
366
  "step": 520
367
  },
368
  {
369
  "epoch": 5.76,
370
- "learning_rate": 0.00014271739130434783,
371
- "loss": 0.5434,
372
  "step": 530
373
  },
374
  {
375
  "epoch": 5.87,
376
- "learning_rate": 0.00014163043478260872,
377
- "loss": 0.6535,
378
  "step": 540
379
  },
380
  {
381
  "epoch": 5.98,
382
- "learning_rate": 0.00014054347826086957,
383
- "loss": 0.4965,
384
  "step": 550
385
  },
386
  {
387
  "epoch": 6.09,
388
- "learning_rate": 0.00013945652173913043,
389
- "loss": 0.6712,
390
  "step": 560
391
  },
392
  {
393
  "epoch": 6.2,
394
- "learning_rate": 0.0001383695652173913,
395
- "loss": 0.5683,
396
  "step": 570
397
  },
398
  {
399
  "epoch": 6.3,
400
- "learning_rate": 0.00013728260869565217,
401
- "loss": 0.3377,
402
  "step": 580
403
  },
404
  {
405
  "epoch": 6.41,
406
- "learning_rate": 0.00013619565217391305,
407
- "loss": 0.482,
408
  "step": 590
409
  },
410
  {
411
  "epoch": 6.52,
412
- "learning_rate": 0.00013510869565217394,
413
- "loss": 0.3766,
414
  "step": 600
415
  },
416
  {
417
  "epoch": 6.52,
418
- "eval_accuracy": 0.8846153846153846,
419
- "eval_loss": 0.4736884534358978,
420
- "eval_runtime": 1.8513,
421
- "eval_samples_per_second": 140.44,
422
- "eval_steps_per_second": 17.825,
423
  "step": 600
424
  },
425
  {
426
  "epoch": 6.63,
427
- "learning_rate": 0.0001340217391304348,
428
- "loss": 0.5068,
429
  "step": 610
430
  },
431
  {
432
  "epoch": 6.74,
433
- "learning_rate": 0.00013293478260869565,
434
- "loss": 0.3019,
435
  "step": 620
436
  },
437
  {
438
  "epoch": 6.85,
439
- "learning_rate": 0.00013184782608695653,
440
- "loss": 0.4852,
441
  "step": 630
442
  },
443
  {
444
  "epoch": 6.96,
445
- "learning_rate": 0.00013076086956521741,
446
- "loss": 0.55,
447
  "step": 640
448
  },
449
  {
450
  "epoch": 7.07,
451
- "learning_rate": 0.00012967391304347827,
452
- "loss": 0.4122,
453
  "step": 650
454
  },
455
  {
456
  "epoch": 7.17,
457
- "learning_rate": 0.00012858695652173913,
458
- "loss": 0.3566,
459
  "step": 660
460
  },
461
  {
462
  "epoch": 7.28,
463
- "learning_rate": 0.0001275,
464
- "loss": 0.4279,
465
  "step": 670
466
  },
467
  {
468
  "epoch": 7.39,
469
- "learning_rate": 0.0001264130434782609,
470
- "loss": 0.3295,
471
  "step": 680
472
  },
473
  {
474
  "epoch": 7.5,
475
- "learning_rate": 0.00012532608695652175,
476
- "loss": 0.5113,
477
  "step": 690
478
  },
479
  {
480
  "epoch": 7.61,
481
- "learning_rate": 0.0001242391304347826,
482
- "loss": 0.3994,
483
  "step": 700
484
  },
485
  {
486
  "epoch": 7.61,
487
- "eval_accuracy": 0.9115384615384615,
488
- "eval_loss": 0.3060305714607239,
489
- "eval_runtime": 1.8463,
490
- "eval_samples_per_second": 140.821,
491
- "eval_steps_per_second": 17.873,
492
  "step": 700
493
  },
494
  {
495
  "epoch": 7.72,
496
- "learning_rate": 0.0001231521739130435,
497
- "loss": 0.5572,
498
  "step": 710
499
  },
500
  {
501
  "epoch": 7.83,
502
- "learning_rate": 0.00012206521739130434,
503
- "loss": 0.2986,
504
  "step": 720
505
  },
506
  {
507
  "epoch": 7.93,
508
- "learning_rate": 0.00012097826086956523,
509
- "loss": 0.5146,
510
  "step": 730
511
  },
512
  {
513
  "epoch": 8.04,
514
- "learning_rate": 0.0001198913043478261,
515
- "loss": 0.4029,
516
  "step": 740
517
  },
518
  {
519
  "epoch": 8.15,
520
- "learning_rate": 0.00011880434782608695,
521
- "loss": 0.3901,
522
  "step": 750
523
  },
524
  {
525
  "epoch": 8.26,
526
- "learning_rate": 0.00011771739130434782,
527
- "loss": 0.5112,
528
  "step": 760
529
  },
530
  {
531
  "epoch": 8.37,
532
- "learning_rate": 0.0001166304347826087,
533
- "loss": 0.2704,
534
  "step": 770
535
  },
536
  {
537
  "epoch": 8.48,
538
- "learning_rate": 0.00011554347826086958,
539
- "loss": 0.3839,
540
  "step": 780
541
  },
542
  {
543
  "epoch": 8.59,
544
- "learning_rate": 0.00011445652173913045,
545
- "loss": 0.4988,
546
  "step": 790
547
  },
548
  {
549
  "epoch": 8.7,
550
- "learning_rate": 0.0001133695652173913,
551
- "loss": 0.2139,
552
  "step": 800
553
  },
554
  {
555
  "epoch": 8.7,
556
- "eval_accuracy": 0.9576923076923077,
557
- "eval_loss": 0.13448870182037354,
558
- "eval_runtime": 1.9169,
559
- "eval_samples_per_second": 135.638,
560
- "eval_steps_per_second": 17.216,
561
  "step": 800
562
  },
563
  {
564
  "epoch": 8.8,
565
- "learning_rate": 0.00011228260869565217,
566
- "loss": 0.2857,
567
  "step": 810
568
  },
569
  {
570
  "epoch": 8.91,
571
- "learning_rate": 0.00011119565217391305,
572
- "loss": 0.2416,
573
  "step": 820
574
  },
575
  {
576
  "epoch": 9.02,
577
- "learning_rate": 0.00011010869565217392,
578
- "loss": 0.4803,
579
  "step": 830
580
  },
581
  {
582
  "epoch": 9.13,
583
- "learning_rate": 0.00010902173913043478,
584
- "loss": 0.4416,
585
  "step": 840
586
  },
587
  {
588
  "epoch": 9.24,
589
- "learning_rate": 0.00010793478260869565,
590
- "loss": 0.3271,
591
  "step": 850
592
  },
593
  {
594
  "epoch": 9.35,
595
- "learning_rate": 0.00010684782608695653,
596
- "loss": 0.2261,
597
  "step": 860
598
  },
599
  {
600
  "epoch": 9.46,
601
- "learning_rate": 0.0001057608695652174,
602
- "loss": 0.2063,
603
  "step": 870
604
  },
605
  {
606
  "epoch": 9.57,
607
- "learning_rate": 0.00010467391304347827,
608
- "loss": 0.4865,
609
  "step": 880
610
  },
611
  {
612
  "epoch": 9.67,
613
- "learning_rate": 0.00010358695652173913,
614
- "loss": 0.4903,
615
  "step": 890
616
  },
617
  {
618
  "epoch": 9.78,
619
- "learning_rate": 0.0001025,
620
- "loss": 0.2995,
621
  "step": 900
622
  },
623
  {
624
  "epoch": 9.78,
625
- "eval_accuracy": 0.95,
626
- "eval_loss": 0.155814990401268,
627
- "eval_runtime": 1.8528,
628
- "eval_samples_per_second": 140.328,
629
- "eval_steps_per_second": 17.811,
630
  "step": 900
631
  },
632
  {
633
  "epoch": 9.89,
634
- "learning_rate": 0.00010141304347826088,
635
- "loss": 0.2847,
636
  "step": 910
637
  },
638
  {
639
  "epoch": 10.0,
640
- "learning_rate": 0.00010032608695652175,
641
- "loss": 0.2913,
642
  "step": 920
643
  },
644
  {
645
  "epoch": 10.11,
646
- "learning_rate": 9.923913043478261e-05,
647
- "loss": 0.2779,
648
  "step": 930
649
  },
650
  {
651
  "epoch": 10.22,
652
- "learning_rate": 9.815217391304349e-05,
653
- "loss": 0.2205,
654
  "step": 940
655
  },
656
  {
657
  "epoch": 10.33,
658
- "learning_rate": 9.706521739130435e-05,
659
- "loss": 0.405,
660
  "step": 950
661
  },
662
  {
663
  "epoch": 10.43,
664
- "learning_rate": 9.597826086956522e-05,
665
- "loss": 0.3613,
666
  "step": 960
667
  },
668
  {
669
  "epoch": 10.54,
670
- "learning_rate": 9.48913043478261e-05,
671
- "loss": 0.2381,
672
  "step": 970
673
  },
674
  {
675
  "epoch": 10.65,
676
- "learning_rate": 9.380434782608696e-05,
677
- "loss": 0.224,
678
  "step": 980
679
  },
680
  {
681
  "epoch": 10.76,
682
- "learning_rate": 9.271739130434784e-05,
683
- "loss": 0.2544,
684
  "step": 990
685
  },
686
  {
687
  "epoch": 10.87,
688
- "learning_rate": 9.16304347826087e-05,
689
- "loss": 0.2835,
690
  "step": 1000
691
  },
692
  {
693
  "epoch": 10.87,
694
- "eval_accuracy": 0.9730769230769231,
695
- "eval_loss": 0.09426813572645187,
696
- "eval_runtime": 2.2351,
697
- "eval_samples_per_second": 116.327,
698
- "eval_steps_per_second": 14.765,
699
  "step": 1000
700
  },
701
  {
702
  "epoch": 10.98,
703
- "learning_rate": 9.054347826086958e-05,
704
- "loss": 0.4605,
705
  "step": 1010
706
  },
707
  {
708
  "epoch": 11.09,
709
- "learning_rate": 8.945652173913043e-05,
710
- "loss": 0.2259,
711
  "step": 1020
712
  },
713
  {
714
  "epoch": 11.2,
715
- "learning_rate": 8.83695652173913e-05,
716
- "loss": 0.2241,
717
  "step": 1030
718
  },
719
  {
720
  "epoch": 11.3,
721
- "learning_rate": 8.728260869565217e-05,
722
- "loss": 0.2173,
723
  "step": 1040
724
  },
725
  {
726
  "epoch": 11.41,
727
- "learning_rate": 8.619565217391304e-05,
728
- "loss": 0.3629,
729
  "step": 1050
730
  },
731
  {
732
  "epoch": 11.52,
733
- "learning_rate": 8.510869565217393e-05,
734
- "loss": 0.6198,
735
  "step": 1060
736
  },
737
  {
738
  "epoch": 11.63,
739
- "learning_rate": 8.402173913043478e-05,
740
- "loss": 0.5369,
741
  "step": 1070
742
  },
743
  {
744
  "epoch": 11.74,
745
- "learning_rate": 8.293478260869567e-05,
746
- "loss": 0.304,
747
  "step": 1080
748
  },
749
  {
750
  "epoch": 11.85,
751
- "learning_rate": 8.184782608695652e-05,
752
- "loss": 0.2756,
753
  "step": 1090
754
  },
755
  {
756
  "epoch": 11.96,
757
  "learning_rate": 8.076086956521739e-05,
758
- "loss": 0.3089,
759
  "step": 1100
760
  },
761
  {
762
  "epoch": 11.96,
763
- "eval_accuracy": 0.9576923076923077,
764
- "eval_loss": 0.09128668904304504,
765
- "eval_runtime": 1.8693,
766
- "eval_samples_per_second": 139.093,
767
- "eval_steps_per_second": 17.654,
768
  "step": 1100
769
  },
770
  {
771
  "epoch": 12.07,
772
  "learning_rate": 7.967391304347826e-05,
773
- "loss": 0.3491,
774
  "step": 1110
775
  },
776
  {
777
  "epoch": 12.17,
778
  "learning_rate": 7.858695652173913e-05,
779
- "loss": 0.1677,
780
  "step": 1120
781
  },
782
  {
783
  "epoch": 12.28,
784
  "learning_rate": 7.75e-05,
785
- "loss": 0.2795,
786
  "step": 1130
787
  },
788
  {
789
  "epoch": 12.39,
790
  "learning_rate": 7.641304347826087e-05,
791
- "loss": 0.2714,
792
  "step": 1140
793
  },
794
  {
795
  "epoch": 12.5,
796
  "learning_rate": 7.532608695652175e-05,
797
- "loss": 0.302,
798
  "step": 1150
799
  },
800
  {
801
  "epoch": 12.61,
802
  "learning_rate": 7.423913043478261e-05,
803
- "loss": 0.3564,
804
  "step": 1160
805
  },
806
  {
807
  "epoch": 12.72,
808
- "learning_rate": 7.32608695652174e-05,
809
- "loss": 0.3085,
810
  "step": 1170
811
  },
812
  {
813
  "epoch": 12.83,
814
- "learning_rate": 7.217391304347827e-05,
815
- "loss": 0.2515,
816
  "step": 1180
817
  },
818
  {
819
  "epoch": 12.93,
820
- "learning_rate": 7.108695652173914e-05,
821
- "loss": 0.3075,
822
  "step": 1190
823
  },
824
  {
825
  "epoch": 13.04,
826
- "learning_rate": 7e-05,
827
- "loss": 0.3632,
828
  "step": 1200
829
  },
830
  {
831
  "epoch": 13.04,
832
- "eval_accuracy": 0.9692307692307692,
833
- "eval_loss": 0.08875690400600433,
834
- "eval_runtime": 2.7533,
835
- "eval_samples_per_second": 94.431,
836
- "eval_steps_per_second": 11.986,
837
  "step": 1200
838
  },
839
  {
840
  "epoch": 13.15,
841
- "learning_rate": 6.891304347826088e-05,
842
- "loss": 0.3888,
843
  "step": 1210
844
  },
845
  {
846
  "epoch": 13.26,
847
- "learning_rate": 6.782608695652173e-05,
848
- "loss": 0.1683,
849
  "step": 1220
850
  },
851
  {
852
  "epoch": 13.37,
853
- "learning_rate": 6.673913043478262e-05,
854
- "loss": 0.3672,
855
  "step": 1230
856
  },
857
  {
858
  "epoch": 13.48,
859
- "learning_rate": 6.565217391304349e-05,
860
- "loss": 0.335,
861
  "step": 1240
862
  },
863
  {
864
  "epoch": 13.59,
865
- "learning_rate": 6.456521739130436e-05,
866
- "loss": 0.1861,
867
  "step": 1250
868
  },
869
  {
870
  "epoch": 13.7,
871
- "learning_rate": 6.347826086956523e-05,
872
- "loss": 0.3386,
873
  "step": 1260
874
  },
875
  {
876
  "epoch": 13.8,
877
- "learning_rate": 6.239130434782608e-05,
878
- "loss": 0.2222,
879
  "step": 1270
880
  },
881
  {
882
  "epoch": 13.91,
883
- "learning_rate": 6.130434782608696e-05,
884
- "loss": 0.1714,
885
  "step": 1280
886
  },
887
  {
888
  "epoch": 14.02,
889
- "learning_rate": 6.021739130434783e-05,
890
- "loss": 0.2699,
891
  "step": 1290
892
  },
893
  {
894
  "epoch": 14.13,
895
- "learning_rate": 5.9130434782608704e-05,
896
- "loss": 0.327,
897
  "step": 1300
898
  },
899
  {
900
  "epoch": 14.13,
901
- "eval_accuracy": 0.9807692307692307,
902
- "eval_loss": 0.10383553802967072,
903
- "eval_runtime": 1.9077,
904
- "eval_samples_per_second": 136.293,
905
- "eval_steps_per_second": 17.299,
906
  "step": 1300
907
  },
908
  {
909
  "epoch": 14.24,
910
- "learning_rate": 5.804347826086957e-05,
911
- "loss": 0.224,
912
  "step": 1310
913
  },
914
  {
915
  "epoch": 14.35,
916
- "learning_rate": 5.695652173913044e-05,
917
- "loss": 0.378,
918
  "step": 1320
919
  },
920
  {
921
  "epoch": 14.46,
922
- "learning_rate": 5.5869565217391306e-05,
923
- "loss": 0.2503,
924
  "step": 1330
925
  },
926
  {
927
  "epoch": 14.57,
928
- "learning_rate": 5.478260869565217e-05,
929
- "loss": 0.1633,
930
  "step": 1340
931
  },
932
  {
933
  "epoch": 14.67,
934
- "learning_rate": 5.3695652173913046e-05,
935
- "loss": 0.2973,
936
  "step": 1350
937
  },
938
  {
939
  "epoch": 14.78,
940
- "learning_rate": 5.260869565217391e-05,
941
- "loss": 0.1887,
942
  "step": 1360
943
  },
944
  {
945
  "epoch": 14.89,
946
- "learning_rate": 5.1521739130434785e-05,
947
- "loss": 0.2953,
948
  "step": 1370
949
  },
950
  {
951
  "epoch": 15.0,
952
- "learning_rate": 5.0434782608695655e-05,
953
- "loss": 0.248,
954
  "step": 1380
955
  },
956
  {
957
  "epoch": 15.11,
958
- "learning_rate": 4.9347826086956524e-05,
959
- "loss": 0.152,
960
  "step": 1390
961
  },
962
  {
963
  "epoch": 15.22,
964
- "learning_rate": 4.8260869565217394e-05,
965
- "loss": 0.313,
966
  "step": 1400
967
  },
968
  {
969
  "epoch": 15.22,
970
  "eval_accuracy": 0.9730769230769231,
971
- "eval_loss": 0.09758734703063965,
972
- "eval_runtime": 2.57,
973
- "eval_samples_per_second": 101.167,
974
- "eval_steps_per_second": 12.84,
975
  "step": 1400
976
  },
977
  {
978
  "epoch": 15.33,
979
- "learning_rate": 4.7173913043478264e-05,
980
- "loss": 0.2301,
981
  "step": 1410
982
  },
983
  {
984
  "epoch": 15.43,
985
- "learning_rate": 4.608695652173913e-05,
986
- "loss": 0.1671,
987
  "step": 1420
988
  },
989
  {
990
  "epoch": 15.54,
991
- "learning_rate": 4.5e-05,
992
- "loss": 0.2399,
993
  "step": 1430
994
  },
995
  {
996
  "epoch": 15.65,
997
- "learning_rate": 4.391304347826087e-05,
998
- "loss": 0.278,
999
  "step": 1440
1000
  },
1001
  {
1002
  "epoch": 15.76,
1003
- "learning_rate": 4.282608695652174e-05,
1004
- "loss": 0.3062,
1005
  "step": 1450
1006
  },
1007
  {
1008
  "epoch": 15.87,
1009
- "learning_rate": 4.1739130434782605e-05,
1010
- "loss": 0.3704,
1011
  "step": 1460
1012
  },
1013
  {
1014
  "epoch": 15.98,
1015
- "learning_rate": 4.065217391304348e-05,
1016
- "loss": 0.231,
1017
  "step": 1470
1018
  },
1019
  {
1020
  "epoch": 16.09,
1021
- "learning_rate": 3.956521739130435e-05,
1022
- "loss": 0.2339,
1023
  "step": 1480
1024
  },
1025
  {
1026
  "epoch": 16.2,
1027
- "learning_rate": 3.847826086956522e-05,
1028
- "loss": 0.2079,
1029
  "step": 1490
1030
  },
1031
  {
1032
  "epoch": 16.3,
1033
- "learning_rate": 3.739130434782609e-05,
1034
- "loss": 0.1752,
1035
  "step": 1500
1036
  },
1037
  {
1038
  "epoch": 16.3,
1039
- "eval_accuracy": 0.9807692307692307,
1040
- "eval_loss": 0.050352372229099274,
1041
- "eval_runtime": 1.9714,
1042
- "eval_samples_per_second": 131.889,
1043
- "eval_steps_per_second": 16.74,
1044
  "step": 1500
1045
  },
1046
  {
1047
  "epoch": 16.41,
1048
- "learning_rate": 3.630434782608696e-05,
1049
- "loss": 0.1359,
1050
  "step": 1510
1051
  },
1052
  {
1053
  "epoch": 16.52,
1054
- "learning_rate": 3.521739130434783e-05,
1055
- "loss": 0.1186,
1056
  "step": 1520
1057
  },
1058
  {
1059
  "epoch": 16.63,
1060
- "learning_rate": 3.413043478260869e-05,
1061
- "loss": 0.2223,
1062
  "step": 1530
1063
  },
1064
  {
1065
  "epoch": 16.74,
1066
- "learning_rate": 3.304347826086956e-05,
1067
- "loss": 0.3185,
1068
  "step": 1540
1069
  },
1070
  {
1071
  "epoch": 16.85,
1072
- "learning_rate": 3.195652173913043e-05,
1073
- "loss": 0.1554,
1074
  "step": 1550
1075
  },
1076
  {
1077
  "epoch": 16.96,
1078
- "learning_rate": 3.086956521739131e-05,
1079
- "loss": 0.2177,
1080
  "step": 1560
1081
  },
1082
  {
1083
  "epoch": 17.07,
1084
- "learning_rate": 2.9782608695652175e-05,
1085
- "loss": 0.167,
1086
  "step": 1570
1087
  },
1088
  {
1089
  "epoch": 17.17,
1090
- "learning_rate": 2.8695652173913044e-05,
1091
- "loss": 0.216,
1092
  "step": 1580
1093
  },
1094
  {
1095
  "epoch": 17.28,
1096
- "learning_rate": 2.7608695652173917e-05,
1097
- "loss": 0.2664,
1098
  "step": 1590
1099
  },
1100
  {
1101
  "epoch": 17.39,
1102
- "learning_rate": 2.6521739130434787e-05,
1103
- "loss": 0.2397,
1104
  "step": 1600
1105
  },
1106
  {
1107
  "epoch": 17.39,
1108
- "eval_accuracy": 0.9807692307692307,
1109
- "eval_loss": 0.06116783991456032,
1110
- "eval_runtime": 1.9643,
1111
- "eval_samples_per_second": 132.362,
1112
- "eval_steps_per_second": 16.8,
1113
  "step": 1600
1114
  },
1115
  {
1116
  "epoch": 17.5,
1117
- "learning_rate": 2.543478260869565e-05,
1118
- "loss": 0.1432,
1119
  "step": 1610
1120
  },
1121
  {
1122
  "epoch": 17.61,
1123
- "learning_rate": 2.4347826086956523e-05,
1124
- "loss": 0.2176,
1125
  "step": 1620
1126
  },
1127
  {
1128
  "epoch": 17.72,
1129
- "learning_rate": 2.3260869565217393e-05,
1130
- "loss": 0.296,
1131
  "step": 1630
1132
  },
1133
  {
1134
  "epoch": 17.83,
1135
- "learning_rate": 2.2173913043478262e-05,
1136
- "loss": 0.2308,
1137
  "step": 1640
1138
  },
1139
  {
1140
  "epoch": 17.93,
1141
- "learning_rate": 2.1086956521739132e-05,
1142
- "loss": 0.1676,
1143
  "step": 1650
1144
  },
1145
  {
1146
  "epoch": 18.04,
1147
- "learning_rate": 2e-05,
1148
- "loss": 0.1274,
1149
  "step": 1660
1150
  },
1151
  {
1152
  "epoch": 18.15,
1153
- "learning_rate": 1.8913043478260868e-05,
1154
- "loss": 0.179,
1155
  "step": 1670
1156
  },
1157
  {
1158
  "epoch": 18.26,
1159
- "learning_rate": 1.782608695652174e-05,
1160
- "loss": 0.1569,
1161
  "step": 1680
1162
  },
1163
  {
1164
  "epoch": 18.37,
1165
- "learning_rate": 1.673913043478261e-05,
1166
  "loss": 0.1897,
1167
  "step": 1690
1168
  },
1169
  {
1170
  "epoch": 18.48,
1171
- "learning_rate": 1.565217391304348e-05,
1172
- "loss": 0.1348,
1173
  "step": 1700
1174
  },
1175
  {
1176
  "epoch": 18.48,
1177
- "eval_accuracy": 0.9846153846153847,
1178
- "eval_loss": 0.05584708973765373,
1179
- "eval_runtime": 1.9494,
1180
- "eval_samples_per_second": 133.373,
1181
- "eval_steps_per_second": 16.928,
1182
  "step": 1700
1183
  },
1184
  {
1185
  "epoch": 18.59,
1186
- "learning_rate": 1.4565217391304348e-05,
1187
- "loss": 0.1662,
1188
  "step": 1710
1189
  },
1190
  {
1191
  "epoch": 18.7,
1192
- "learning_rate": 1.3478260869565218e-05,
1193
- "loss": 0.1644,
1194
  "step": 1720
1195
  },
1196
  {
1197
  "epoch": 18.8,
1198
- "learning_rate": 1.2391304347826088e-05,
1199
- "loss": 0.2025,
1200
  "step": 1730
1201
  },
1202
  {
1203
  "epoch": 18.91,
1204
- "learning_rate": 1.1304347826086957e-05,
1205
- "loss": 0.1745,
1206
  "step": 1740
1207
  },
1208
  {
1209
  "epoch": 19.02,
1210
- "learning_rate": 1.0217391304347827e-05,
1211
- "loss": 0.1786,
1212
  "step": 1750
1213
  },
1214
  {
1215
  "epoch": 19.13,
1216
- "learning_rate": 9.130434782608697e-06,
1217
- "loss": 0.2791,
1218
  "step": 1760
1219
  },
1220
  {
1221
  "epoch": 19.24,
1222
- "learning_rate": 8.043478260869565e-06,
1223
- "loss": 0.2056,
1224
  "step": 1770
1225
  },
1226
  {
1227
  "epoch": 19.35,
1228
- "learning_rate": 6.956521739130435e-06,
1229
- "loss": 0.2616,
1230
  "step": 1780
1231
  },
1232
  {
1233
  "epoch": 19.46,
1234
- "learning_rate": 5.869565217391305e-06,
1235
- "loss": 0.1488,
1236
  "step": 1790
1237
  },
1238
  {
1239
  "epoch": 19.57,
1240
- "learning_rate": 4.782608695652174e-06,
1241
- "loss": 0.2842,
1242
  "step": 1800
1243
  },
1244
  {
1245
  "epoch": 19.57,
1246
  "eval_accuracy": 0.9769230769230769,
1247
- "eval_loss": 0.05035410821437836,
1248
- "eval_runtime": 1.9009,
1249
- "eval_samples_per_second": 136.776,
1250
- "eval_steps_per_second": 17.36,
1251
  "step": 1800
1252
  },
1253
  {
1254
  "epoch": 19.67,
1255
- "learning_rate": 3.695652173913044e-06,
1256
- "loss": 0.1487,
1257
  "step": 1810
1258
  },
1259
  {
1260
  "epoch": 19.78,
1261
- "learning_rate": 2.608695652173913e-06,
1262
- "loss": 0.1818,
1263
  "step": 1820
1264
  },
1265
  {
1266
  "epoch": 19.89,
1267
- "learning_rate": 1.5217391304347827e-06,
1268
- "loss": 0.1469,
1269
  "step": 1830
1270
  },
1271
  {
1272
  "epoch": 20.0,
1273
- "learning_rate": 4.347826086956522e-07,
1274
- "loss": 0.213,
1275
  "step": 1840
1276
  },
1277
  {
1278
  "epoch": 20.0,
1279
  "step": 1840,
1280
  "total_flos": 7.30261234607063e+17,
1281
- "train_loss": 0.5342570722103119,
1282
- "train_runtime": 409.3983,
1283
- "train_samples_per_second": 71.715,
1284
- "train_steps_per_second": 4.494
1285
  }
1286
  ],
1287
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.9769230769230769,
3
  "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned_ASL_Isolated_Swin_dataset2/checkpoint-1700",
4
  "epoch": 20.0,
5
  "eval_steps": 100,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.11,
13
+ "learning_rate": 0.0001991304347826087,
14
+ "loss": 3.3413,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.22,
19
+ "learning_rate": 0.00019804347826086956,
20
+ "loss": 3.1778,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.33,
25
+ "learning_rate": 0.00019695652173913044,
26
+ "loss": 3.1311,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.43,
31
+ "learning_rate": 0.0001958695652173913,
32
+ "loss": 2.9107,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.54,
37
+ "learning_rate": 0.00019478260869565218,
38
+ "loss": 2.3763,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.65,
43
+ "learning_rate": 0.00019369565217391307,
44
+ "loss": 2.0376,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.76,
49
+ "learning_rate": 0.00019260869565217392,
50
+ "loss": 1.9444,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.87,
55
+ "learning_rate": 0.00019152173913043478,
56
+ "loss": 1.8087,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.98,
61
+ "learning_rate": 0.00019043478260869566,
62
+ "loss": 1.6835,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 1.09,
67
+ "learning_rate": 0.00018934782608695655,
68
+ "loss": 1.5439,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 1.09,
73
+ "eval_accuracy": 0.5538461538461539,
74
+ "eval_loss": 1.4188454151153564,
75
+ "eval_runtime": 1.8531,
76
+ "eval_samples_per_second": 140.308,
77
+ "eval_steps_per_second": 17.808,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 1.2,
82
+ "learning_rate": 0.0001882608695652174,
83
+ "loss": 1.3483,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 1.3,
88
+ "learning_rate": 0.00018717391304347826,
89
+ "loss": 1.3837,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 1.41,
94
+ "learning_rate": 0.00018608695652173914,
95
+ "loss": 1.2886,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 1.52,
100
+ "learning_rate": 0.00018500000000000002,
101
+ "loss": 1.1915,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 1.63,
106
+ "learning_rate": 0.00018391304347826088,
107
+ "loss": 1.1142,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 1.74,
112
  "learning_rate": 0.00018282608695652174,
113
+ "loss": 0.9908,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 1.85,
118
  "learning_rate": 0.00018173913043478262,
119
+ "loss": 0.9563,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 1.96,
124
  "learning_rate": 0.00018065217391304348,
125
+ "loss": 0.9761,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 2.07,
130
  "learning_rate": 0.00017956521739130436,
131
+ "loss": 1.0636,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 2.17,
136
  "learning_rate": 0.00017847826086956522,
137
+ "loss": 0.8646,
138
  "step": 200
139
  },
140
  {
141
  "epoch": 2.17,
142
+ "eval_accuracy": 0.8884615384615384,
143
+ "eval_loss": 0.45421722531318665,
144
+ "eval_runtime": 2.2052,
145
+ "eval_samples_per_second": 117.904,
146
+ "eval_steps_per_second": 14.965,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 2.28,
151
  "learning_rate": 0.0001773913043478261,
152
+ "loss": 0.7407,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 2.39,
157
  "learning_rate": 0.00017630434782608695,
158
+ "loss": 0.8695,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 2.5,
163
  "learning_rate": 0.00017521739130434784,
164
+ "loss": 0.7102,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 2.61,
169
  "learning_rate": 0.00017413043478260872,
170
+ "loss": 0.9596,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 2.72,
175
  "learning_rate": 0.00017304347826086958,
176
+ "loss": 0.8462,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 2.83,
181
  "learning_rate": 0.00017195652173913043,
182
+ "loss": 0.8556,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 2.93,
187
  "learning_rate": 0.00017086956521739132,
188
+ "loss": 0.664,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 3.04,
193
  "learning_rate": 0.0001697826086956522,
194
+ "loss": 0.8349,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 3.15,
199
  "learning_rate": 0.00016869565217391306,
200
+ "loss": 0.6526,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 3.26,
205
  "learning_rate": 0.0001676086956521739,
206
+ "loss": 0.5485,
207
  "step": 300
208
  },
209
  {
210
  "epoch": 3.26,
211
+ "eval_accuracy": 0.8538461538461538,
212
+ "eval_loss": 0.4102769196033478,
213
+ "eval_runtime": 1.7696,
214
+ "eval_samples_per_second": 146.93,
215
+ "eval_steps_per_second": 18.649,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 3.37,
220
+ "learning_rate": 0.0001665217391304348,
221
+ "loss": 0.701,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 3.48,
226
+ "learning_rate": 0.00016543478260869568,
227
+ "loss": 0.725,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 3.59,
232
+ "learning_rate": 0.00016434782608695653,
233
+ "loss": 0.7088,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 3.7,
238
+ "learning_rate": 0.0001632608695652174,
239
+ "loss": 0.5813,
240
  "step": 340
241
  },
242
  {
243
  "epoch": 3.8,
244
+ "learning_rate": 0.00016217391304347827,
245
+ "loss": 0.7197,
246
  "step": 350
247
  },
248
  {
249
  "epoch": 3.91,
250
+ "learning_rate": 0.00016108695652173913,
251
+ "loss": 0.7112,
252
  "step": 360
253
  },
254
  {
255
  "epoch": 4.02,
256
+ "learning_rate": 0.00016,
257
+ "loss": 0.6083,
258
  "step": 370
259
  },
260
  {
261
  "epoch": 4.13,
262
+ "learning_rate": 0.00015891304347826087,
263
+ "loss": 0.6653,
264
  "step": 380
265
  },
266
  {
267
  "epoch": 4.24,
268
+ "learning_rate": 0.00015782608695652175,
269
+ "loss": 0.6319,
270
  "step": 390
271
  },
272
  {
273
  "epoch": 4.35,
274
+ "learning_rate": 0.0001567391304347826,
275
+ "loss": 0.5082,
276
  "step": 400
277
  },
278
  {
279
  "epoch": 4.35,
280
+ "eval_accuracy": 0.8961538461538462,
281
+ "eval_loss": 0.29254817962646484,
282
+ "eval_runtime": 2.5312,
283
+ "eval_samples_per_second": 102.718,
284
+ "eval_steps_per_second": 13.037,
285
  "step": 400
286
  },
287
  {
288
  "epoch": 4.46,
289
+ "learning_rate": 0.00015565217391304346,
290
+ "loss": 0.6872,
291
  "step": 410
292
  },
293
  {
294
  "epoch": 4.57,
295
+ "learning_rate": 0.00015456521739130437,
296
+ "loss": 0.5159,
297
  "step": 420
298
  },
299
  {
300
  "epoch": 4.67,
301
+ "learning_rate": 0.00015347826086956523,
302
+ "loss": 0.4834,
303
  "step": 430
304
  },
305
  {
306
  "epoch": 4.78,
307
+ "learning_rate": 0.0001523913043478261,
308
+ "loss": 0.4467,
309
  "step": 440
310
  },
311
  {
312
  "epoch": 4.89,
313
+ "learning_rate": 0.00015130434782608694,
314
+ "loss": 0.5091,
315
  "step": 450
316
  },
317
  {
318
  "epoch": 5.0,
319
+ "learning_rate": 0.00015021739130434785,
320
+ "loss": 0.701,
321
  "step": 460
322
  },
323
  {
324
  "epoch": 5.11,
325
+ "learning_rate": 0.0001491304347826087,
326
+ "loss": 0.5314,
327
  "step": 470
328
  },
329
  {
330
  "epoch": 5.22,
331
+ "learning_rate": 0.00014804347826086957,
332
+ "loss": 0.5484,
333
  "step": 480
334
  },
335
  {
336
  "epoch": 5.33,
337
+ "learning_rate": 0.00014695652173913045,
338
+ "loss": 0.4856,
339
  "step": 490
340
  },
341
  {
342
  "epoch": 5.43,
343
+ "learning_rate": 0.0001458695652173913,
344
+ "loss": 0.5302,
345
  "step": 500
346
  },
347
  {
348
  "epoch": 5.43,
349
+ "eval_accuracy": 0.926923076923077,
350
+ "eval_loss": 0.24707049131393433,
351
+ "eval_runtime": 1.8365,
352
+ "eval_samples_per_second": 141.57,
353
+ "eval_steps_per_second": 17.969,
354
  "step": 500
355
  },
356
  {
357
  "epoch": 5.54,
358
+ "learning_rate": 0.0001447826086956522,
359
+ "loss": 0.4876,
360
  "step": 510
361
  },
362
  {
363
  "epoch": 5.65,
364
+ "learning_rate": 0.00014369565217391304,
365
+ "loss": 0.6267,
366
  "step": 520
367
  },
368
  {
369
  "epoch": 5.76,
370
+ "learning_rate": 0.00014260869565217393,
371
+ "loss": 0.5022,
372
  "step": 530
373
  },
374
  {
375
  "epoch": 5.87,
376
+ "learning_rate": 0.00014152173913043478,
377
+ "loss": 0.6259,
378
  "step": 540
379
  },
380
  {
381
  "epoch": 5.98,
382
+ "learning_rate": 0.00014043478260869567,
383
+ "loss": 0.588,
384
  "step": 550
385
  },
386
  {
387
  "epoch": 6.09,
388
+ "learning_rate": 0.00013934782608695652,
389
+ "loss": 0.6801,
390
  "step": 560
391
  },
392
  {
393
  "epoch": 6.2,
394
+ "learning_rate": 0.0001382608695652174,
395
+ "loss": 0.6062,
396
  "step": 570
397
  },
398
  {
399
  "epoch": 6.3,
400
+ "learning_rate": 0.00013717391304347826,
401
+ "loss": 0.3015,
402
  "step": 580
403
  },
404
  {
405
  "epoch": 6.41,
406
+ "learning_rate": 0.00013608695652173912,
407
+ "loss": 0.5071,
408
  "step": 590
409
  },
410
  {
411
  "epoch": 6.52,
412
+ "learning_rate": 0.00013500000000000003,
413
+ "loss": 0.4072,
414
  "step": 600
415
  },
416
  {
417
  "epoch": 6.52,
418
+ "eval_accuracy": 0.9230769230769231,
419
+ "eval_loss": 0.2676173746585846,
420
+ "eval_runtime": 2.5959,
421
+ "eval_samples_per_second": 100.157,
422
+ "eval_steps_per_second": 12.712,
423
  "step": 600
424
  },
425
  {
426
  "epoch": 6.63,
427
+ "learning_rate": 0.00013391304347826088,
428
+ "loss": 0.451,
429
  "step": 610
430
  },
431
  {
432
  "epoch": 6.74,
433
+ "learning_rate": 0.00013282608695652174,
434
+ "loss": 0.3797,
435
  "step": 620
436
  },
437
  {
438
  "epoch": 6.85,
439
+ "learning_rate": 0.0001317391304347826,
440
+ "loss": 0.415,
441
  "step": 630
442
  },
443
  {
444
  "epoch": 6.96,
445
+ "learning_rate": 0.00013065217391304348,
446
+ "loss": 0.4253,
447
  "step": 640
448
  },
449
  {
450
  "epoch": 7.07,
451
+ "learning_rate": 0.00012956521739130436,
452
+ "loss": 0.4366,
453
  "step": 650
454
  },
455
  {
456
  "epoch": 7.17,
457
+ "learning_rate": 0.00012847826086956522,
458
+ "loss": 0.3499,
459
  "step": 660
460
  },
461
  {
462
  "epoch": 7.28,
463
+ "learning_rate": 0.0001273913043478261,
464
+ "loss": 0.4012,
465
  "step": 670
466
  },
467
  {
468
  "epoch": 7.39,
469
+ "learning_rate": 0.00012630434782608696,
470
+ "loss": 0.414,
471
  "step": 680
472
  },
473
  {
474
  "epoch": 7.5,
475
+ "learning_rate": 0.00012521739130434784,
476
+ "loss": 0.5052,
477
  "step": 690
478
  },
479
  {
480
  "epoch": 7.61,
481
+ "learning_rate": 0.0001241304347826087,
482
+ "loss": 0.4424,
483
  "step": 700
484
  },
485
  {
486
  "epoch": 7.61,
487
+ "eval_accuracy": 0.9038461538461539,
488
+ "eval_loss": 0.4149817228317261,
489
+ "eval_runtime": 1.8169,
490
+ "eval_samples_per_second": 143.1,
491
+ "eval_steps_per_second": 18.163,
492
  "step": 700
493
  },
494
  {
495
  "epoch": 7.72,
496
+ "learning_rate": 0.00012304347826086958,
497
+ "loss": 0.5183,
498
  "step": 710
499
  },
500
  {
501
  "epoch": 7.83,
502
+ "learning_rate": 0.00012195652173913044,
503
+ "loss": 0.2976,
504
  "step": 720
505
  },
506
  {
507
  "epoch": 7.93,
508
+ "learning_rate": 0.00012086956521739131,
509
+ "loss": 0.5184,
510
  "step": 730
511
  },
512
  {
513
  "epoch": 8.04,
514
+ "learning_rate": 0.00011978260869565219,
515
+ "loss": 0.4194,
516
  "step": 740
517
  },
518
  {
519
  "epoch": 8.15,
520
+ "learning_rate": 0.00011869565217391305,
521
+ "loss": 0.3032,
522
  "step": 750
523
  },
524
  {
525
  "epoch": 8.26,
526
+ "learning_rate": 0.00011760869565217392,
527
+ "loss": 0.5633,
528
  "step": 760
529
  },
530
  {
531
  "epoch": 8.37,
532
+ "learning_rate": 0.00011652173913043479,
533
+ "loss": 0.3037,
534
  "step": 770
535
  },
536
  {
537
  "epoch": 8.48,
538
+ "learning_rate": 0.00011543478260869567,
539
+ "loss": 0.3617,
540
  "step": 780
541
  },
542
  {
543
  "epoch": 8.59,
544
+ "learning_rate": 0.00011434782608695654,
545
+ "loss": 0.4318,
546
  "step": 790
547
  },
548
  {
549
  "epoch": 8.7,
550
+ "learning_rate": 0.0001132608695652174,
551
+ "loss": 0.3409,
552
  "step": 800
553
  },
554
  {
555
  "epoch": 8.7,
556
+ "eval_accuracy": 0.9538461538461539,
557
+ "eval_loss": 0.19218704104423523,
558
+ "eval_runtime": 2.6334,
559
+ "eval_samples_per_second": 98.732,
560
+ "eval_steps_per_second": 12.531,
561
  "step": 800
562
  },
563
  {
564
  "epoch": 8.8,
565
+ "learning_rate": 0.00011217391304347826,
566
+ "loss": 0.2645,
567
  "step": 810
568
  },
569
  {
570
  "epoch": 8.91,
571
+ "learning_rate": 0.00011108695652173912,
572
+ "loss": 0.2775,
573
  "step": 820
574
  },
575
  {
576
  "epoch": 9.02,
577
+ "learning_rate": 0.00011000000000000002,
578
+ "loss": 0.4254,
579
  "step": 830
580
  },
581
  {
582
  "epoch": 9.13,
583
+ "learning_rate": 0.00010891304347826087,
584
+ "loss": 0.3383,
585
  "step": 840
586
  },
587
  {
588
  "epoch": 9.24,
589
+ "learning_rate": 0.00010782608695652174,
590
+ "loss": 0.3443,
591
  "step": 850
592
  },
593
  {
594
  "epoch": 9.35,
595
+ "learning_rate": 0.00010673913043478261,
596
+ "loss": 0.2455,
597
  "step": 860
598
  },
599
  {
600
  "epoch": 9.46,
601
+ "learning_rate": 0.00010565217391304347,
602
+ "loss": 0.2475,
603
  "step": 870
604
  },
605
  {
606
  "epoch": 9.57,
607
+ "learning_rate": 0.00010456521739130437,
608
+ "loss": 0.4743,
609
  "step": 880
610
  },
611
  {
612
  "epoch": 9.67,
613
+ "learning_rate": 0.00010347826086956522,
614
+ "loss": 0.4935,
615
  "step": 890
616
  },
617
  {
618
  "epoch": 9.78,
619
+ "learning_rate": 0.00010239130434782609,
620
+ "loss": 0.3046,
621
  "step": 900
622
  },
623
  {
624
  "epoch": 9.78,
625
+ "eval_accuracy": 0.9461538461538461,
626
+ "eval_loss": 0.1916845738887787,
627
+ "eval_runtime": 1.7809,
628
+ "eval_samples_per_second": 145.992,
629
+ "eval_steps_per_second": 18.53,
630
  "step": 900
631
  },
632
  {
633
  "epoch": 9.89,
634
+ "learning_rate": 0.00010130434782608695,
635
+ "loss": 0.2834,
636
  "step": 910
637
  },
638
  {
639
  "epoch": 10.0,
640
+ "learning_rate": 0.00010021739130434784,
641
+ "loss": 0.4026,
642
  "step": 920
643
  },
644
  {
645
  "epoch": 10.11,
646
+ "learning_rate": 9.91304347826087e-05,
647
+ "loss": 0.3004,
648
  "step": 930
649
  },
650
  {
651
  "epoch": 10.22,
652
+ "learning_rate": 9.804347826086957e-05,
653
+ "loss": 0.2161,
654
  "step": 940
655
  },
656
  {
657
  "epoch": 10.33,
658
+ "learning_rate": 9.695652173913044e-05,
659
+ "loss": 0.3859,
660
  "step": 950
661
  },
662
  {
663
  "epoch": 10.43,
664
+ "learning_rate": 9.586956521739131e-05,
665
+ "loss": 0.2911,
666
  "step": 960
667
  },
668
  {
669
  "epoch": 10.54,
670
+ "learning_rate": 9.478260869565218e-05,
671
+ "loss": 0.2693,
672
  "step": 970
673
  },
674
  {
675
  "epoch": 10.65,
676
+ "learning_rate": 9.369565217391305e-05,
677
+ "loss": 0.2366,
678
  "step": 980
679
  },
680
  {
681
  "epoch": 10.76,
682
+ "learning_rate": 9.260869565217392e-05,
683
+ "loss": 0.3161,
684
  "step": 990
685
  },
686
  {
687
  "epoch": 10.87,
688
+ "learning_rate": 9.152173913043479e-05,
689
+ "loss": 0.2911,
690
  "step": 1000
691
  },
692
  {
693
  "epoch": 10.87,
694
+ "eval_accuracy": 0.9423076923076923,
695
+ "eval_loss": 0.2271503061056137,
696
+ "eval_runtime": 2.5147,
697
+ "eval_samples_per_second": 103.391,
698
+ "eval_steps_per_second": 13.123,
699
  "step": 1000
700
  },
701
  {
702
  "epoch": 10.98,
703
+ "learning_rate": 9.043478260869566e-05,
704
+ "loss": 0.4276,
705
  "step": 1010
706
  },
707
  {
708
  "epoch": 11.09,
709
+ "learning_rate": 8.934782608695653e-05,
710
+ "loss": 0.245,
711
  "step": 1020
712
  },
713
  {
714
  "epoch": 11.2,
715
+ "learning_rate": 8.82608695652174e-05,
716
+ "loss": 0.2376,
717
  "step": 1030
718
  },
719
  {
720
  "epoch": 11.3,
721
+ "learning_rate": 8.717391304347827e-05,
722
+ "loss": 0.1843,
723
  "step": 1040
724
  },
725
  {
726
  "epoch": 11.41,
727
+ "learning_rate": 8.608695652173914e-05,
728
+ "loss": 0.3411,
729
  "step": 1050
730
  },
731
  {
732
  "epoch": 11.52,
733
+ "learning_rate": 8.5e-05,
734
+ "loss": 0.5081,
735
  "step": 1060
736
  },
737
  {
738
  "epoch": 11.63,
739
+ "learning_rate": 8.391304347826088e-05,
740
+ "loss": 0.2889,
741
  "step": 1070
742
  },
743
  {
744
  "epoch": 11.74,
745
+ "learning_rate": 8.282608695652175e-05,
746
+ "loss": 0.2588,
747
  "step": 1080
748
  },
749
  {
750
  "epoch": 11.85,
751
+ "learning_rate": 8.173913043478262e-05,
752
+ "loss": 0.1457,
753
  "step": 1090
754
  },
755
  {
756
  "epoch": 11.96,
757
  "learning_rate": 8.076086956521739e-05,
758
+ "loss": 0.269,
759
  "step": 1100
760
  },
761
  {
762
  "epoch": 11.96,
763
+ "eval_accuracy": 0.9692307692307692,
764
+ "eval_loss": 0.07221826165914536,
765
+ "eval_runtime": 1.7687,
766
+ "eval_samples_per_second": 146.998,
767
+ "eval_steps_per_second": 18.657,
768
  "step": 1100
769
  },
770
  {
771
  "epoch": 12.07,
772
  "learning_rate": 7.967391304347826e-05,
773
+ "loss": 0.3124,
774
  "step": 1110
775
  },
776
  {
777
  "epoch": 12.17,
778
  "learning_rate": 7.858695652173913e-05,
779
+ "loss": 0.1558,
780
  "step": 1120
781
  },
782
  {
783
  "epoch": 12.28,
784
  "learning_rate": 7.75e-05,
785
+ "loss": 0.3082,
786
  "step": 1130
787
  },
788
  {
789
  "epoch": 12.39,
790
  "learning_rate": 7.641304347826087e-05,
791
+ "loss": 0.2492,
792
  "step": 1140
793
  },
794
  {
795
  "epoch": 12.5,
796
  "learning_rate": 7.532608695652175e-05,
797
+ "loss": 0.3847,
798
  "step": 1150
799
  },
800
  {
801
  "epoch": 12.61,
802
  "learning_rate": 7.423913043478261e-05,
803
+ "loss": 0.3452,
804
  "step": 1160
805
  },
806
  {
807
  "epoch": 12.72,
808
+ "learning_rate": 7.315217391304349e-05,
809
+ "loss": 0.3171,
810
  "step": 1170
811
  },
812
  {
813
  "epoch": 12.83,
814
+ "learning_rate": 7.206521739130435e-05,
815
+ "loss": 0.3295,
816
  "step": 1180
817
  },
818
  {
819
  "epoch": 12.93,
820
+ "learning_rate": 7.097826086956522e-05,
821
+ "loss": 0.3387,
822
  "step": 1190
823
  },
824
  {
825
  "epoch": 13.04,
826
+ "learning_rate": 6.989130434782609e-05,
827
+ "loss": 0.3709,
828
  "step": 1200
829
  },
830
  {
831
  "epoch": 13.04,
832
+ "eval_accuracy": 0.9653846153846154,
833
+ "eval_loss": 0.14726483821868896,
834
+ "eval_runtime": 2.5212,
835
+ "eval_samples_per_second": 103.126,
836
+ "eval_steps_per_second": 13.089,
837
  "step": 1200
838
  },
839
  {
840
  "epoch": 13.15,
841
+ "learning_rate": 6.880434782608696e-05,
842
+ "loss": 0.3647,
843
  "step": 1210
844
  },
845
  {
846
  "epoch": 13.26,
847
+ "learning_rate": 6.771739130434783e-05,
848
+ "loss": 0.137,
849
  "step": 1220
850
  },
851
  {
852
  "epoch": 13.37,
853
+ "learning_rate": 6.66304347826087e-05,
854
+ "loss": 0.3562,
855
  "step": 1230
856
  },
857
  {
858
  "epoch": 13.48,
859
+ "learning_rate": 6.554347826086957e-05,
860
+ "loss": 0.323,
861
  "step": 1240
862
  },
863
  {
864
  "epoch": 13.59,
865
+ "learning_rate": 6.445652173913044e-05,
866
+ "loss": 0.2173,
867
  "step": 1250
868
  },
869
  {
870
  "epoch": 13.7,
871
+ "learning_rate": 6.33695652173913e-05,
872
+ "loss": 0.315,
873
  "step": 1260
874
  },
875
  {
876
  "epoch": 13.8,
877
+ "learning_rate": 6.228260869565218e-05,
878
+ "loss": 0.274,
879
  "step": 1270
880
  },
881
  {
882
  "epoch": 13.91,
883
+ "learning_rate": 6.119565217391304e-05,
884
+ "loss": 0.1757,
885
  "step": 1280
886
  },
887
  {
888
  "epoch": 14.02,
889
+ "learning_rate": 6.010869565217392e-05,
890
+ "loss": 0.1968,
891
  "step": 1290
892
  },
893
  {
894
  "epoch": 14.13,
895
+ "learning_rate": 5.9021739130434784e-05,
896
+ "loss": 0.3443,
897
  "step": 1300
898
  },
899
  {
900
  "epoch": 14.13,
901
+ "eval_accuracy": 0.9615384615384616,
902
+ "eval_loss": 0.15447035431861877,
903
+ "eval_runtime": 1.7932,
904
+ "eval_samples_per_second": 144.988,
905
+ "eval_steps_per_second": 18.402,
906
  "step": 1300
907
  },
908
  {
909
  "epoch": 14.24,
910
+ "learning_rate": 5.793478260869566e-05,
911
+ "loss": 0.225,
912
  "step": 1310
913
  },
914
  {
915
  "epoch": 14.35,
916
+ "learning_rate": 5.6847826086956524e-05,
917
+ "loss": 0.336,
918
  "step": 1320
919
  },
920
  {
921
  "epoch": 14.46,
922
+ "learning_rate": 5.5760869565217386e-05,
923
+ "loss": 0.2073,
924
  "step": 1330
925
  },
926
  {
927
  "epoch": 14.57,
928
+ "learning_rate": 5.467391304347826e-05,
929
+ "loss": 0.2144,
930
  "step": 1340
931
  },
932
  {
933
  "epoch": 14.67,
934
+ "learning_rate": 5.3586956521739126e-05,
935
+ "loss": 0.3399,
936
  "step": 1350
937
  },
938
  {
939
  "epoch": 14.78,
940
+ "learning_rate": 5.25e-05,
941
+ "loss": 0.2516,
942
  "step": 1360
943
  },
944
  {
945
  "epoch": 14.89,
946
+ "learning_rate": 5.141304347826087e-05,
947
+ "loss": 0.2494,
948
  "step": 1370
949
  },
950
  {
951
  "epoch": 15.0,
952
+ "learning_rate": 5.032608695652175e-05,
953
+ "loss": 0.2213,
954
  "step": 1380
955
  },
956
  {
957
  "epoch": 15.11,
958
+ "learning_rate": 4.923913043478261e-05,
959
+ "loss": 0.1834,
960
  "step": 1390
961
  },
962
  {
963
  "epoch": 15.22,
964
+ "learning_rate": 4.815217391304348e-05,
965
+ "loss": 0.187,
966
  "step": 1400
967
  },
968
  {
969
  "epoch": 15.22,
970
  "eval_accuracy": 0.9730769230769231,
971
+ "eval_loss": 0.10598226636648178,
972
+ "eval_runtime": 2.4502,
973
+ "eval_samples_per_second": 106.116,
974
+ "eval_steps_per_second": 13.469,
975
  "step": 1400
976
  },
977
  {
978
  "epoch": 15.33,
979
+ "learning_rate": 4.706521739130435e-05,
980
+ "loss": 0.1387,
981
  "step": 1410
982
  },
983
  {
984
  "epoch": 15.43,
985
+ "learning_rate": 4.597826086956522e-05,
986
+ "loss": 0.1653,
987
  "step": 1420
988
  },
989
  {
990
  "epoch": 15.54,
991
+ "learning_rate": 4.489130434782609e-05,
992
+ "loss": 0.2012,
993
  "step": 1430
994
  },
995
  {
996
  "epoch": 15.65,
997
+ "learning_rate": 4.380434782608696e-05,
998
+ "loss": 0.224,
999
  "step": 1440
1000
  },
1001
  {
1002
  "epoch": 15.76,
1003
+ "learning_rate": 4.271739130434783e-05,
1004
+ "loss": 0.3341,
1005
  "step": 1450
1006
  },
1007
  {
1008
  "epoch": 15.87,
1009
+ "learning_rate": 4.16304347826087e-05,
1010
+ "loss": 0.3279,
1011
  "step": 1460
1012
  },
1013
  {
1014
  "epoch": 15.98,
1015
+ "learning_rate": 4.054347826086957e-05,
1016
+ "loss": 0.2218,
1017
  "step": 1470
1018
  },
1019
  {
1020
  "epoch": 16.09,
1021
+ "learning_rate": 3.945652173913044e-05,
1022
+ "loss": 0.2184,
1023
  "step": 1480
1024
  },
1025
  {
1026
  "epoch": 16.2,
1027
+ "learning_rate": 3.836956521739131e-05,
1028
+ "loss": 0.2069,
1029
  "step": 1490
1030
  },
1031
  {
1032
  "epoch": 16.3,
1033
+ "learning_rate": 3.728260869565218e-05,
1034
+ "loss": 0.1879,
1035
  "step": 1500
1036
  },
1037
  {
1038
  "epoch": 16.3,
1039
+ "eval_accuracy": 0.9692307692307692,
1040
+ "eval_loss": 0.11237060278654099,
1041
+ "eval_runtime": 1.8389,
1042
+ "eval_samples_per_second": 141.393,
1043
+ "eval_steps_per_second": 17.946,
1044
  "step": 1500
1045
  },
1046
  {
1047
  "epoch": 16.41,
1048
+ "learning_rate": 3.619565217391305e-05,
1049
+ "loss": 0.1226,
1050
  "step": 1510
1051
  },
1052
  {
1053
  "epoch": 16.52,
1054
+ "learning_rate": 3.510869565217392e-05,
1055
+ "loss": 0.1394,
1056
  "step": 1520
1057
  },
1058
  {
1059
  "epoch": 16.63,
1060
+ "learning_rate": 3.402173913043478e-05,
1061
+ "loss": 0.2203,
1062
  "step": 1530
1063
  },
1064
  {
1065
  "epoch": 16.74,
1066
+ "learning_rate": 3.293478260869565e-05,
1067
+ "loss": 0.3049,
1068
  "step": 1540
1069
  },
1070
  {
1071
  "epoch": 16.85,
1072
+ "learning_rate": 3.1847826086956526e-05,
1073
+ "loss": 0.1622,
1074
  "step": 1550
1075
  },
1076
  {
1077
  "epoch": 16.96,
1078
+ "learning_rate": 3.0760869565217395e-05,
1079
+ "loss": 0.2375,
1080
  "step": 1560
1081
  },
1082
  {
1083
  "epoch": 17.07,
1084
+ "learning_rate": 2.967391304347826e-05,
1085
+ "loss": 0.123,
1086
  "step": 1570
1087
  },
1088
  {
1089
  "epoch": 17.17,
1090
+ "learning_rate": 2.8586956521739135e-05,
1091
+ "loss": 0.1922,
1092
  "step": 1580
1093
  },
1094
  {
1095
  "epoch": 17.28,
1096
+ "learning_rate": 2.7500000000000004e-05,
1097
+ "loss": 0.2794,
1098
  "step": 1590
1099
  },
1100
  {
1101
  "epoch": 17.39,
1102
+ "learning_rate": 2.6413043478260867e-05,
1103
+ "loss": 0.2183,
1104
  "step": 1600
1105
  },
1106
  {
1107
  "epoch": 17.39,
1108
+ "eval_accuracy": 0.9615384615384616,
1109
+ "eval_loss": 0.13765688240528107,
1110
+ "eval_runtime": 2.3734,
1111
+ "eval_samples_per_second": 109.546,
1112
+ "eval_steps_per_second": 13.904,
1113
  "step": 1600
1114
  },
1115
  {
1116
  "epoch": 17.5,
1117
+ "learning_rate": 2.5326086956521737e-05,
1118
+ "loss": 0.1748,
1119
  "step": 1610
1120
  },
1121
  {
1122
  "epoch": 17.61,
1123
+ "learning_rate": 2.423913043478261e-05,
1124
+ "loss": 0.2382,
1125
  "step": 1620
1126
  },
1127
  {
1128
  "epoch": 17.72,
1129
+ "learning_rate": 2.315217391304348e-05,
1130
+ "loss": 0.2712,
1131
  "step": 1630
1132
  },
1133
  {
1134
  "epoch": 17.83,
1135
+ "learning_rate": 2.206521739130435e-05,
1136
+ "loss": 0.2208,
1137
  "step": 1640
1138
  },
1139
  {
1140
  "epoch": 17.93,
1141
+ "learning_rate": 2.097826086956522e-05,
1142
+ "loss": 0.1639,
1143
  "step": 1650
1144
  },
1145
  {
1146
  "epoch": 18.04,
1147
+ "learning_rate": 1.9891304347826085e-05,
1148
+ "loss": 0.1006,
1149
  "step": 1660
1150
  },
1151
  {
1152
  "epoch": 18.15,
1153
+ "learning_rate": 1.8804347826086958e-05,
1154
+ "loss": 0.1697,
1155
  "step": 1670
1156
  },
1157
  {
1158
  "epoch": 18.26,
1159
+ "learning_rate": 1.7717391304347828e-05,
1160
+ "loss": 0.1967,
1161
  "step": 1680
1162
  },
1163
  {
1164
  "epoch": 18.37,
1165
+ "learning_rate": 1.6630434782608698e-05,
1166
  "loss": 0.1897,
1167
  "step": 1690
1168
  },
1169
  {
1170
  "epoch": 18.48,
1171
+ "learning_rate": 1.5543478260869564e-05,
1172
+ "loss": 0.1478,
1173
  "step": 1700
1174
  },
1175
  {
1176
  "epoch": 18.48,
1177
+ "eval_accuracy": 0.9769230769230769,
1178
+ "eval_loss": 0.12689848244190216,
1179
+ "eval_runtime": 1.8016,
1180
+ "eval_samples_per_second": 144.314,
1181
+ "eval_steps_per_second": 18.317,
1182
  "step": 1700
1183
  },
1184
  {
1185
  "epoch": 18.59,
1186
+ "learning_rate": 1.4456521739130435e-05,
1187
+ "loss": 0.1515,
1188
  "step": 1710
1189
  },
1190
  {
1191
  "epoch": 18.7,
1192
+ "learning_rate": 1.3369565217391305e-05,
1193
+ "loss": 0.2222,
1194
  "step": 1720
1195
  },
1196
  {
1197
  "epoch": 18.8,
1198
+ "learning_rate": 1.2282608695652175e-05,
1199
+ "loss": 0.201,
1200
  "step": 1730
1201
  },
1202
  {
1203
  "epoch": 18.91,
1204
+ "learning_rate": 1.1195652173913044e-05,
1205
+ "loss": 0.1389,
1206
  "step": 1740
1207
  },
1208
  {
1209
  "epoch": 19.02,
1210
+ "learning_rate": 1.0108695652173914e-05,
1211
+ "loss": 0.1495,
1212
  "step": 1750
1213
  },
1214
  {
1215
  "epoch": 19.13,
1216
+ "learning_rate": 9.021739130434784e-06,
1217
+ "loss": 0.2616,
1218
  "step": 1760
1219
  },
1220
  {
1221
  "epoch": 19.24,
1222
+ "learning_rate": 7.934782608695653e-06,
1223
+ "loss": 0.1864,
1224
  "step": 1770
1225
  },
1226
  {
1227
  "epoch": 19.35,
1228
+ "learning_rate": 6.847826086956521e-06,
1229
+ "loss": 0.1395,
1230
  "step": 1780
1231
  },
1232
  {
1233
  "epoch": 19.46,
1234
+ "learning_rate": 5.760869565217392e-06,
1235
+ "loss": 0.1486,
1236
  "step": 1790
1237
  },
1238
  {
1239
  "epoch": 19.57,
1240
+ "learning_rate": 4.673913043478261e-06,
1241
+ "loss": 0.1944,
1242
  "step": 1800
1243
  },
1244
  {
1245
  "epoch": 19.57,
1246
  "eval_accuracy": 0.9769230769230769,
1247
+ "eval_loss": 0.09090477973222733,
1248
+ "eval_runtime": 2.3489,
1249
+ "eval_samples_per_second": 110.691,
1250
+ "eval_steps_per_second": 14.049,
1251
  "step": 1800
1252
  },
1253
  {
1254
  "epoch": 19.67,
1255
+ "learning_rate": 3.5869565217391305e-06,
1256
+ "loss": 0.1359,
1257
  "step": 1810
1258
  },
1259
  {
1260
  "epoch": 19.78,
1261
+ "learning_rate": 2.5e-06,
1262
+ "loss": 0.143,
1263
  "step": 1820
1264
  },
1265
  {
1266
  "epoch": 19.89,
1267
+ "learning_rate": 1.4130434782608697e-06,
1268
+ "loss": 0.1368,
1269
  "step": 1830
1270
  },
1271
  {
1272
  "epoch": 20.0,
1273
+ "learning_rate": 3.260869565217391e-07,
1274
+ "loss": 0.162,
1275
  "step": 1840
1276
  },
1277
  {
1278
  "epoch": 20.0,
1279
  "step": 1840,
1280
  "total_flos": 7.30261234607063e+17,
1281
+ "train_loss": 0.5132673807766126,
1282
+ "train_runtime": 395.7262,
1283
+ "train_samples_per_second": 74.193,
1284
+ "train_steps_per_second": 4.65
1285
  }
1286
  ],
1287
  "logging_steps": 10,