nandyc commited on
Commit
6cda71c
1 Parent(s): 00b156e

🍻 cheers

Browse files
Files changed (5) hide show
  1. README.md +4 -3
  2. all_results.json +11 -11
  3. eval_results.json +6 -6
  4. train_results.json +6 -6
  5. trainer_state.json +472 -2380
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: apache-2.0
3
  base_model: microsoft/swin-tiny-patch4-window7-224
4
  tags:
 
5
  - generated_from_trainer
6
  metrics:
7
  - accuracy
@@ -15,10 +16,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # swin-tiny-patch4-window7-224-finetuned_ASL_Isolated_Swin_dataset2
17
 
18
- This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.0504
21
- - Accuracy: 0.9769
22
 
23
  ## Model description
24
 
 
2
  license: apache-2.0
3
  base_model: microsoft/swin-tiny-patch4-window7-224
4
  tags:
5
+ - image-classification
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
 
16
 
17
  # swin-tiny-patch4-window7-224-finetuned_ASL_Isolated_Swin_dataset2
18
 
19
+ This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the ASL_Isolated_Swin_dataset dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.0558
22
+ - Accuracy: 0.9846
23
 
24
  ## Model description
25
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_accuracy": 0.9884615384615385,
4
- "eval_loss": 0.12455728650093079,
5
- "eval_runtime": 1.9696,
6
- "eval_samples_per_second": 132.009,
7
- "eval_steps_per_second": 16.755,
8
- "total_flos": 1.8256530865176576e+18,
9
- "train_loss": 0.3526626825786155,
10
- "train_runtime": 995.2393,
11
- "train_samples_per_second": 73.751,
12
- "train_steps_per_second": 4.622
13
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.9846153846153847,
4
+ "eval_loss": 0.05584708973765373,
5
+ "eval_runtime": 2.0579,
6
+ "eval_samples_per_second": 126.342,
7
+ "eval_steps_per_second": 16.036,
8
+ "total_flos": 7.30261234607063e+17,
9
+ "train_loss": 0.5342570722103119,
10
+ "train_runtime": 409.3983,
11
+ "train_samples_per_second": 71.715,
12
+ "train_steps_per_second": 4.494
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_accuracy": 0.9884615384615385,
4
- "eval_loss": 0.12455728650093079,
5
- "eval_runtime": 1.9696,
6
- "eval_samples_per_second": 132.009,
7
- "eval_steps_per_second": 16.755
8
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.9846153846153847,
4
+ "eval_loss": 0.05584708973765373,
5
+ "eval_runtime": 2.0579,
6
+ "eval_samples_per_second": 126.342,
7
+ "eval_steps_per_second": 16.036
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
- "total_flos": 1.8256530865176576e+18,
4
- "train_loss": 0.3526626825786155,
5
- "train_runtime": 995.2393,
6
- "train_samples_per_second": 73.751,
7
- "train_steps_per_second": 4.622
8
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "total_flos": 7.30261234607063e+17,
4
+ "train_loss": 0.5342570722103119,
5
+ "train_runtime": 409.3983,
6
+ "train_samples_per_second": 71.715,
7
+ "train_steps_per_second": 4.494
8
  }
trainer_state.json CHANGED
@@ -1,3202 +1,1294 @@
1
  {
2
- "best_metric": 0.9884615384615385,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned_ASL_Isolated_Swin_dataset2/checkpoint-3600",
4
- "epoch": 50.0,
5
  "eval_steps": 100,
6
- "global_step": 4600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.11,
13
- "learning_rate": 0.00019960869565217393,
14
- "loss": 3.3054,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.22,
19
- "learning_rate": 0.00019917391304347828,
20
- "loss": 3.1481,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.33,
25
- "learning_rate": 0.00019873913043478263,
26
- "loss": 3.0688,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.43,
31
- "learning_rate": 0.00019830434782608695,
32
- "loss": 2.7014,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.54,
37
- "learning_rate": 0.0001978695652173913,
38
- "loss": 2.2834,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.65,
43
- "learning_rate": 0.00019743478260869564,
44
- "loss": 2.0143,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.76,
49
- "learning_rate": 0.00019700000000000002,
50
- "loss": 1.7308,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.87,
55
- "learning_rate": 0.0001966086956521739,
56
- "loss": 1.5816,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.98,
61
- "learning_rate": 0.0001961739130434783,
62
- "loss": 1.5232,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 1.09,
67
- "learning_rate": 0.00019573913043478263,
68
- "loss": 1.4683,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 1.09,
73
- "eval_accuracy": 0.5692307692307692,
74
- "eval_loss": 1.1744574308395386,
75
- "eval_runtime": 1.9194,
76
- "eval_samples_per_second": 135.456,
77
- "eval_steps_per_second": 17.193,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 1.2,
82
- "learning_rate": 0.00019530434782608698,
83
- "loss": 1.1985,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 1.3,
88
- "learning_rate": 0.0001948695652173913,
89
- "loss": 1.2874,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 1.41,
94
- "learning_rate": 0.00019443478260869565,
95
- "loss": 1.1879,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 1.52,
100
- "learning_rate": 0.000194,
101
- "loss": 1.0703,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 1.63,
106
- "learning_rate": 0.00019356521739130435,
107
- "loss": 0.935,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 1.74,
112
- "learning_rate": 0.0001931304347826087,
113
- "loss": 1.003,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 1.85,
118
- "learning_rate": 0.00019269565217391304,
119
- "loss": 0.8789,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 1.96,
124
- "learning_rate": 0.00019226086956521742,
125
- "loss": 1.0651,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 2.07,
130
- "learning_rate": 0.00019182608695652177,
131
- "loss": 1.106,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 2.17,
136
- "learning_rate": 0.00019139130434782611,
137
- "loss": 1.0278,
138
  "step": 200
139
  },
140
  {
141
  "epoch": 2.17,
142
- "eval_accuracy": 0.8384615384615385,
143
- "eval_loss": 0.5679711103439331,
144
- "eval_runtime": 2.3986,
145
- "eval_samples_per_second": 108.398,
146
- "eval_steps_per_second": 13.758,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 2.28,
151
- "learning_rate": 0.00019095652173913043,
152
- "loss": 0.8058,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 2.39,
157
- "learning_rate": 0.00019052173913043478,
158
- "loss": 1.0418,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 2.5,
163
- "learning_rate": 0.00019008695652173913,
164
- "loss": 0.7689,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 2.61,
169
- "learning_rate": 0.00018965217391304348,
170
- "loss": 1.0218,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 2.72,
175
- "learning_rate": 0.00018921739130434783,
176
- "loss": 0.8868,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 2.83,
181
- "learning_rate": 0.00018878260869565217,
182
- "loss": 0.7203,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 2.93,
187
- "learning_rate": 0.00018834782608695655,
188
- "loss": 0.6535,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 3.04,
193
- "learning_rate": 0.0001879130434782609,
194
- "loss": 0.7889,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 3.15,
199
- "learning_rate": 0.00018747826086956524,
200
- "loss": 0.7151,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 3.26,
205
- "learning_rate": 0.00018704347826086957,
206
- "loss": 0.6458,
207
  "step": 300
208
  },
209
  {
210
  "epoch": 3.26,
211
- "eval_accuracy": 0.8807692307692307,
212
- "eval_loss": 0.2887081503868103,
213
- "eval_runtime": 2.4065,
214
- "eval_samples_per_second": 108.039,
215
- "eval_steps_per_second": 13.713,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 3.37,
220
- "learning_rate": 0.0001866086956521739,
221
- "loss": 0.7461,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 3.48,
226
- "learning_rate": 0.00018617391304347826,
227
- "loss": 0.9252,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 3.59,
232
- "learning_rate": 0.0001857391304347826,
233
- "loss": 0.8101,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 3.7,
238
- "learning_rate": 0.00018530434782608696,
239
- "loss": 0.6978,
240
  "step": 340
241
  },
242
  {
243
  "epoch": 3.8,
244
- "learning_rate": 0.0001848695652173913,
245
- "loss": 0.8036,
246
  "step": 350
247
  },
248
  {
249
  "epoch": 3.91,
250
- "learning_rate": 0.00018443478260869568,
251
- "loss": 0.8744,
252
  "step": 360
253
  },
254
  {
255
  "epoch": 4.02,
256
- "learning_rate": 0.00018400000000000003,
257
- "loss": 0.7999,
258
  "step": 370
259
  },
260
  {
261
  "epoch": 4.13,
262
- "learning_rate": 0.00018356521739130438,
263
- "loss": 0.6261,
264
  "step": 380
265
  },
266
  {
267
  "epoch": 4.24,
268
- "learning_rate": 0.0001831304347826087,
269
- "loss": 0.6083,
270
  "step": 390
271
  },
272
  {
273
  "epoch": 4.35,
274
- "learning_rate": 0.00018269565217391304,
275
- "loss": 0.6043,
276
  "step": 400
277
  },
278
  {
279
  "epoch": 4.35,
280
- "eval_accuracy": 0.9384615384615385,
281
- "eval_loss": 0.24507971107959747,
282
- "eval_runtime": 2.5346,
283
- "eval_samples_per_second": 102.582,
284
- "eval_steps_per_second": 13.02,
285
  "step": 400
286
  },
287
  {
288
  "epoch": 4.46,
289
- "learning_rate": 0.0001822608695652174,
290
- "loss": 0.5909,
291
  "step": 410
292
  },
293
  {
294
  "epoch": 4.57,
295
- "learning_rate": 0.00018182608695652174,
296
- "loss": 0.6176,
297
  "step": 420
298
  },
299
  {
300
  "epoch": 4.67,
301
- "learning_rate": 0.0001813913043478261,
302
- "loss": 0.4938,
303
  "step": 430
304
  },
305
  {
306
  "epoch": 4.78,
307
- "learning_rate": 0.00018095652173913044,
308
- "loss": 0.6233,
309
  "step": 440
310
  },
311
  {
312
  "epoch": 4.89,
313
- "learning_rate": 0.0001805217391304348,
314
- "loss": 0.5976,
315
  "step": 450
316
  },
317
  {
318
  "epoch": 5.0,
319
- "learning_rate": 0.00018008695652173916,
320
- "loss": 0.6603,
321
  "step": 460
322
  },
323
  {
324
  "epoch": 5.11,
325
- "learning_rate": 0.0001796521739130435,
326
- "loss": 0.5788,
327
  "step": 470
328
  },
329
  {
330
  "epoch": 5.22,
331
- "learning_rate": 0.00017921739130434783,
332
- "loss": 0.5944,
333
  "step": 480
334
  },
335
  {
336
  "epoch": 5.33,
337
- "learning_rate": 0.00017878260869565217,
338
- "loss": 0.5945,
339
  "step": 490
340
  },
341
  {
342
  "epoch": 5.43,
343
- "learning_rate": 0.00017834782608695652,
344
- "loss": 0.5372,
345
  "step": 500
346
  },
347
  {
348
  "epoch": 5.43,
349
- "eval_accuracy": 0.9230769230769231,
350
- "eval_loss": 0.2226274162530899,
351
- "eval_runtime": 1.8172,
352
- "eval_samples_per_second": 143.077,
353
- "eval_steps_per_second": 18.16,
354
  "step": 500
355
  },
356
  {
357
  "epoch": 5.54,
358
- "learning_rate": 0.00017791304347826087,
359
- "loss": 0.5606,
360
  "step": 510
361
  },
362
  {
363
  "epoch": 5.65,
364
- "learning_rate": 0.00017747826086956522,
365
- "loss": 0.6493,
366
  "step": 520
367
  },
368
  {
369
  "epoch": 5.76,
370
- "learning_rate": 0.00017704347826086957,
371
- "loss": 0.4905,
372
  "step": 530
373
  },
374
  {
375
  "epoch": 5.87,
376
- "learning_rate": 0.00017660869565217394,
377
- "loss": 0.8186,
378
  "step": 540
379
  },
380
  {
381
  "epoch": 5.98,
382
- "learning_rate": 0.0001761739130434783,
383
- "loss": 0.612,
384
  "step": 550
385
  },
386
  {
387
  "epoch": 6.09,
388
- "learning_rate": 0.0001757391304347826,
389
- "loss": 0.7152,
390
  "step": 560
391
  },
392
  {
393
  "epoch": 6.2,
394
- "learning_rate": 0.00017530434782608696,
395
- "loss": 0.5914,
396
  "step": 570
397
  },
398
  {
399
  "epoch": 6.3,
400
- "learning_rate": 0.0001748695652173913,
401
- "loss": 0.3955,
402
  "step": 580
403
  },
404
  {
405
  "epoch": 6.41,
406
- "learning_rate": 0.00017443478260869565,
407
- "loss": 0.5577,
408
  "step": 590
409
  },
410
  {
411
  "epoch": 6.52,
412
- "learning_rate": 0.000174,
413
- "loss": 0.4619,
414
  "step": 600
415
  },
416
  {
417
  "epoch": 6.52,
418
- "eval_accuracy": 0.9115384615384615,
419
- "eval_loss": 0.3454606533050537,
420
- "eval_runtime": 2.2659,
421
- "eval_samples_per_second": 114.745,
422
- "eval_steps_per_second": 14.564,
423
  "step": 600
424
  },
425
  {
426
  "epoch": 6.63,
427
- "learning_rate": 0.00017356521739130435,
428
- "loss": 0.5097,
429
  "step": 610
430
  },
431
  {
432
  "epoch": 6.74,
433
- "learning_rate": 0.0001731304347826087,
434
- "loss": 0.4338,
435
  "step": 620
436
  },
437
  {
438
  "epoch": 6.85,
439
- "learning_rate": 0.00017269565217391307,
440
- "loss": 0.4486,
441
  "step": 630
442
  },
443
  {
444
  "epoch": 6.96,
445
- "learning_rate": 0.00017226086956521742,
446
- "loss": 0.5887,
447
  "step": 640
448
  },
449
  {
450
  "epoch": 7.07,
451
- "learning_rate": 0.00017182608695652174,
452
- "loss": 0.5752,
453
  "step": 650
454
  },
455
  {
456
  "epoch": 7.17,
457
- "learning_rate": 0.0001713913043478261,
458
- "loss": 0.4971,
459
  "step": 660
460
  },
461
  {
462
  "epoch": 7.28,
463
- "learning_rate": 0.00017095652173913044,
464
- "loss": 0.3792,
465
  "step": 670
466
  },
467
  {
468
  "epoch": 7.39,
469
- "learning_rate": 0.00017052173913043478,
470
- "loss": 0.2748,
471
  "step": 680
472
  },
473
  {
474
  "epoch": 7.5,
475
- "learning_rate": 0.00017008695652173913,
476
- "loss": 0.5585,
477
  "step": 690
478
  },
479
  {
480
  "epoch": 7.61,
481
- "learning_rate": 0.00016965217391304348,
482
- "loss": 0.5265,
483
  "step": 700
484
  },
485
  {
486
  "epoch": 7.61,
487
- "eval_accuracy": 0.9153846153846154,
488
- "eval_loss": 0.31777822971343994,
489
- "eval_runtime": 1.8523,
490
- "eval_samples_per_second": 140.366,
491
- "eval_steps_per_second": 17.816,
492
  "step": 700
493
  },
494
  {
495
  "epoch": 7.72,
496
- "learning_rate": 0.00016921739130434783,
497
- "loss": 0.6863,
498
  "step": 710
499
  },
500
  {
501
  "epoch": 7.83,
502
- "learning_rate": 0.0001687826086956522,
503
- "loss": 0.3957,
504
  "step": 720
505
  },
506
  {
507
  "epoch": 7.93,
508
- "learning_rate": 0.00016834782608695655,
509
- "loss": 0.5072,
510
  "step": 730
511
  },
512
  {
513
  "epoch": 8.04,
514
- "learning_rate": 0.00016791304347826087,
515
- "loss": 0.4274,
516
  "step": 740
517
  },
518
  {
519
  "epoch": 8.15,
520
- "learning_rate": 0.00016747826086956522,
521
- "loss": 0.3224,
522
  "step": 750
523
  },
524
  {
525
  "epoch": 8.26,
526
- "learning_rate": 0.00016704347826086957,
527
- "loss": 0.5873,
528
  "step": 760
529
  },
530
  {
531
  "epoch": 8.37,
532
- "learning_rate": 0.00016660869565217391,
533
- "loss": 0.4524,
534
  "step": 770
535
  },
536
  {
537
  "epoch": 8.48,
538
- "learning_rate": 0.00016617391304347826,
539
- "loss": 0.3896,
540
  "step": 780
541
  },
542
  {
543
  "epoch": 8.59,
544
- "learning_rate": 0.0001657391304347826,
545
- "loss": 0.5826,
546
  "step": 790
547
  },
548
  {
549
  "epoch": 8.7,
550
- "learning_rate": 0.00016530434782608696,
551
- "loss": 0.284,
552
  "step": 800
553
  },
554
  {
555
  "epoch": 8.7,
556
- "eval_accuracy": 0.8884615384615384,
557
- "eval_loss": 0.44208580255508423,
558
- "eval_runtime": 1.8545,
559
- "eval_samples_per_second": 140.199,
560
- "eval_steps_per_second": 17.795,
561
  "step": 800
562
  },
563
  {
564
  "epoch": 8.8,
565
- "learning_rate": 0.0001648695652173913,
566
- "loss": 0.4775,
567
  "step": 810
568
  },
569
  {
570
  "epoch": 8.91,
571
- "learning_rate": 0.00016443478260869568,
572
- "loss": 0.3023,
573
  "step": 820
574
  },
575
  {
576
  "epoch": 9.02,
577
- "learning_rate": 0.000164,
578
- "loss": 0.5154,
579
  "step": 830
580
  },
581
  {
582
  "epoch": 9.13,
583
- "learning_rate": 0.00016356521739130435,
584
- "loss": 0.4916,
585
  "step": 840
586
  },
587
  {
588
  "epoch": 9.24,
589
- "learning_rate": 0.0001631304347826087,
590
- "loss": 0.4106,
591
  "step": 850
592
  },
593
  {
594
  "epoch": 9.35,
595
- "learning_rate": 0.00016269565217391305,
596
- "loss": 0.2205,
597
  "step": 860
598
  },
599
  {
600
  "epoch": 9.46,
601
- "learning_rate": 0.0001622608695652174,
602
- "loss": 0.3173,
603
  "step": 870
604
  },
605
  {
606
  "epoch": 9.57,
607
- "learning_rate": 0.00016182608695652174,
608
- "loss": 0.5083,
609
  "step": 880
610
  },
611
  {
612
  "epoch": 9.67,
613
- "learning_rate": 0.0001613913043478261,
614
- "loss": 0.5707,
615
  "step": 890
616
  },
617
  {
618
  "epoch": 9.78,
619
- "learning_rate": 0.00016095652173913044,
620
- "loss": 0.4421,
621
  "step": 900
622
  },
623
  {
624
  "epoch": 9.78,
625
- "eval_accuracy": 0.9307692307692308,
626
- "eval_loss": 0.22608502209186554,
627
- "eval_runtime": 2.0734,
628
- "eval_samples_per_second": 125.399,
629
- "eval_steps_per_second": 15.916,
630
  "step": 900
631
  },
632
  {
633
  "epoch": 9.89,
634
- "learning_rate": 0.0001605217391304348,
635
- "loss": 0.4098,
636
  "step": 910
637
  },
638
  {
639
  "epoch": 10.0,
640
- "learning_rate": 0.00016008695652173913,
641
- "loss": 0.4572,
642
  "step": 920
643
  },
644
  {
645
  "epoch": 10.11,
646
- "learning_rate": 0.00015965217391304348,
647
- "loss": 0.3418,
648
  "step": 930
649
  },
650
  {
651
  "epoch": 10.22,
652
- "learning_rate": 0.00015921739130434783,
653
- "loss": 0.2752,
654
  "step": 940
655
  },
656
  {
657
  "epoch": 10.33,
658
- "learning_rate": 0.00015878260869565218,
659
- "loss": 0.4817,
660
  "step": 950
661
  },
662
  {
663
  "epoch": 10.43,
664
- "learning_rate": 0.00015834782608695652,
665
- "loss": 0.5141,
666
  "step": 960
667
  },
668
  {
669
  "epoch": 10.54,
670
- "learning_rate": 0.00015791304347826087,
671
- "loss": 0.5455,
672
  "step": 970
673
  },
674
  {
675
  "epoch": 10.65,
676
- "learning_rate": 0.00015747826086956522,
677
- "loss": 0.3143,
678
  "step": 980
679
  },
680
  {
681
  "epoch": 10.76,
682
- "learning_rate": 0.00015704347826086957,
683
- "loss": 0.4953,
684
  "step": 990
685
  },
686
  {
687
  "epoch": 10.87,
688
- "learning_rate": 0.00015660869565217394,
689
- "loss": 0.3366,
690
  "step": 1000
691
  },
692
  {
693
  "epoch": 10.87,
694
- "eval_accuracy": 0.9423076923076923,
695
- "eval_loss": 0.20851510763168335,
696
- "eval_runtime": 1.8573,
697
- "eval_samples_per_second": 139.987,
698
- "eval_steps_per_second": 17.768,
699
  "step": 1000
700
  },
701
  {
702
  "epoch": 10.98,
703
- "learning_rate": 0.00015617391304347826,
704
- "loss": 0.4971,
705
  "step": 1010
706
  },
707
  {
708
  "epoch": 11.09,
709
- "learning_rate": 0.0001557391304347826,
710
- "loss": 0.2471,
711
  "step": 1020
712
  },
713
  {
714
  "epoch": 11.2,
715
- "learning_rate": 0.00015530434782608696,
716
- "loss": 0.2852,
717
  "step": 1030
718
  },
719
  {
720
  "epoch": 11.3,
721
- "learning_rate": 0.0001548695652173913,
722
- "loss": 0.3441,
723
  "step": 1040
724
  },
725
  {
726
  "epoch": 11.41,
727
- "learning_rate": 0.00015443478260869565,
728
- "loss": 0.3399,
729
  "step": 1050
730
  },
731
  {
732
  "epoch": 11.52,
733
- "learning_rate": 0.000154,
734
- "loss": 0.6415,
735
  "step": 1060
736
  },
737
  {
738
  "epoch": 11.63,
739
- "learning_rate": 0.00015356521739130435,
740
- "loss": 0.4671,
741
  "step": 1070
742
  },
743
  {
744
  "epoch": 11.74,
745
- "learning_rate": 0.0001531304347826087,
746
- "loss": 0.3037,
747
  "step": 1080
748
  },
749
  {
750
  "epoch": 11.85,
751
- "learning_rate": 0.00015273913043478262,
752
- "loss": 0.2522,
753
  "step": 1090
754
  },
755
  {
756
  "epoch": 11.96,
757
- "learning_rate": 0.00015230434782608697,
758
- "loss": 0.4313,
759
  "step": 1100
760
  },
761
  {
762
  "epoch": 11.96,
763
- "eval_accuracy": 0.9653846153846154,
764
- "eval_loss": 0.19381502270698547,
765
- "eval_runtime": 2.4428,
766
- "eval_samples_per_second": 106.434,
767
- "eval_steps_per_second": 13.509,
768
  "step": 1100
769
  },
770
  {
771
  "epoch": 12.07,
772
- "learning_rate": 0.00015186956521739132,
773
- "loss": 0.4242,
774
  "step": 1110
775
  },
776
  {
777
  "epoch": 12.17,
778
- "learning_rate": 0.00015143478260869566,
779
- "loss": 0.3869,
780
  "step": 1120
781
  },
782
  {
783
  "epoch": 12.28,
784
- "learning_rate": 0.000151,
785
- "loss": 0.3848,
786
  "step": 1130
787
  },
788
  {
789
  "epoch": 12.39,
790
- "learning_rate": 0.00015056521739130436,
791
- "loss": 0.3096,
792
  "step": 1140
793
  },
794
  {
795
  "epoch": 12.5,
796
- "learning_rate": 0.0001501304347826087,
797
- "loss": 0.3504,
798
  "step": 1150
799
  },
800
  {
801
  "epoch": 12.61,
802
- "learning_rate": 0.00014969565217391305,
803
- "loss": 0.4618,
804
  "step": 1160
805
  },
806
  {
807
  "epoch": 12.72,
808
- "learning_rate": 0.0001492608695652174,
809
- "loss": 0.422,
810
  "step": 1170
811
  },
812
  {
813
  "epoch": 12.83,
814
- "learning_rate": 0.00014882608695652175,
815
- "loss": 0.4747,
816
  "step": 1180
817
  },
818
  {
819
  "epoch": 12.93,
820
- "learning_rate": 0.0001483913043478261,
821
- "loss": 0.4213,
822
  "step": 1190
823
  },
824
  {
825
  "epoch": 13.04,
826
- "learning_rate": 0.00014795652173913045,
827
- "loss": 0.4708,
828
  "step": 1200
829
  },
830
  {
831
  "epoch": 13.04,
832
- "eval_accuracy": 0.9615384615384616,
833
- "eval_loss": 0.09830717742443085,
834
- "eval_runtime": 1.8063,
835
- "eval_samples_per_second": 143.943,
836
- "eval_steps_per_second": 18.27,
837
  "step": 1200
838
  },
839
  {
840
  "epoch": 13.15,
841
- "learning_rate": 0.0001475217391304348,
842
- "loss": 0.4838,
843
  "step": 1210
844
  },
845
  {
846
  "epoch": 13.26,
847
- "learning_rate": 0.00014708695652173914,
848
- "loss": 0.2553,
849
  "step": 1220
850
  },
851
  {
852
  "epoch": 13.37,
853
- "learning_rate": 0.0001466521739130435,
854
- "loss": 0.4845,
855
  "step": 1230
856
  },
857
  {
858
  "epoch": 13.48,
859
- "learning_rate": 0.00014621739130434784,
860
- "loss": 0.392,
861
  "step": 1240
862
  },
863
  {
864
  "epoch": 13.59,
865
- "learning_rate": 0.00014578260869565219,
866
- "loss": 0.2566,
867
  "step": 1250
868
  },
869
  {
870
  "epoch": 13.7,
871
- "learning_rate": 0.0001453478260869565,
872
- "loss": 0.3421,
873
  "step": 1260
874
  },
875
  {
876
  "epoch": 13.8,
877
- "learning_rate": 0.00014491304347826088,
878
- "loss": 0.4546,
879
  "step": 1270
880
  },
881
  {
882
  "epoch": 13.91,
883
- "learning_rate": 0.00014447826086956523,
884
- "loss": 0.3361,
885
  "step": 1280
886
  },
887
  {
888
  "epoch": 14.02,
889
- "learning_rate": 0.00014404347826086958,
890
- "loss": 0.3381,
891
  "step": 1290
892
  },
893
  {
894
  "epoch": 14.13,
895
- "learning_rate": 0.00014360869565217392,
896
- "loss": 0.5222,
897
  "step": 1300
898
  },
899
  {
900
  "epoch": 14.13,
901
- "eval_accuracy": 0.9384615384615385,
902
- "eval_loss": 0.2471397966146469,
903
- "eval_runtime": 2.6705,
904
- "eval_samples_per_second": 97.36,
905
- "eval_steps_per_second": 12.357,
906
  "step": 1300
907
  },
908
  {
909
  "epoch": 14.24,
910
- "learning_rate": 0.00014317391304347827,
911
- "loss": 0.358,
912
  "step": 1310
913
  },
914
  {
915
  "epoch": 14.35,
916
- "learning_rate": 0.00014273913043478262,
917
- "loss": 0.5186,
918
  "step": 1320
919
  },
920
  {
921
  "epoch": 14.46,
922
- "learning_rate": 0.00014230434782608697,
923
- "loss": 0.3595,
924
  "step": 1330
925
  },
926
  {
927
  "epoch": 14.57,
928
- "learning_rate": 0.00014186956521739132,
929
- "loss": 0.2905,
930
  "step": 1340
931
  },
932
  {
933
  "epoch": 14.67,
934
- "learning_rate": 0.00014143478260869564,
935
- "loss": 0.3964,
936
  "step": 1350
937
  },
938
  {
939
  "epoch": 14.78,
940
- "learning_rate": 0.000141,
941
- "loss": 0.4401,
942
  "step": 1360
943
  },
944
  {
945
  "epoch": 14.89,
946
- "learning_rate": 0.00014056521739130436,
947
- "loss": 0.4114,
948
  "step": 1370
949
  },
950
  {
951
  "epoch": 15.0,
952
- "learning_rate": 0.0001401304347826087,
953
- "loss": 0.3029,
954
  "step": 1380
955
  },
956
  {
957
  "epoch": 15.11,
958
- "learning_rate": 0.00013969565217391306,
959
- "loss": 0.2695,
960
  "step": 1390
961
  },
962
  {
963
  "epoch": 15.22,
964
- "learning_rate": 0.0001392608695652174,
965
- "loss": 0.4582,
966
  "step": 1400
967
  },
968
  {
969
  "epoch": 15.22,
970
- "eval_accuracy": 0.9807692307692307,
971
- "eval_loss": 0.09266785532236099,
972
- "eval_runtime": 1.8003,
973
- "eval_samples_per_second": 144.423,
974
- "eval_steps_per_second": 18.331,
975
  "step": 1400
976
  },
977
  {
978
  "epoch": 15.33,
979
- "learning_rate": 0.00013882608695652175,
980
- "loss": 0.3313,
981
  "step": 1410
982
  },
983
  {
984
  "epoch": 15.43,
985
- "learning_rate": 0.0001383913043478261,
986
- "loss": 0.2894,
987
  "step": 1420
988
  },
989
  {
990
  "epoch": 15.54,
991
- "learning_rate": 0.00013795652173913045,
992
- "loss": 0.3195,
993
  "step": 1430
994
  },
995
  {
996
  "epoch": 15.65,
997
- "learning_rate": 0.00013752173913043477,
998
- "loss": 0.3371,
999
  "step": 1440
1000
  },
1001
  {
1002
  "epoch": 15.76,
1003
- "learning_rate": 0.00013708695652173914,
1004
- "loss": 0.5094,
1005
  "step": 1450
1006
  },
1007
  {
1008
  "epoch": 15.87,
1009
- "learning_rate": 0.0001366521739130435,
1010
- "loss": 0.4538,
1011
  "step": 1460
1012
  },
1013
  {
1014
  "epoch": 15.98,
1015
- "learning_rate": 0.00013621739130434784,
1016
- "loss": 0.3469,
1017
  "step": 1470
1018
  },
1019
  {
1020
  "epoch": 16.09,
1021
- "learning_rate": 0.00013578260869565219,
1022
- "loss": 0.3314,
1023
  "step": 1480
1024
  },
1025
  {
1026
  "epoch": 16.2,
1027
- "learning_rate": 0.00013534782608695653,
1028
- "loss": 0.2637,
1029
  "step": 1490
1030
  },
1031
  {
1032
  "epoch": 16.3,
1033
- "learning_rate": 0.00013491304347826088,
1034
- "loss": 0.3018,
1035
  "step": 1500
1036
  },
1037
  {
1038
  "epoch": 16.3,
1039
- "eval_accuracy": 0.9384615384615385,
1040
- "eval_loss": 0.2099553942680359,
1041
- "eval_runtime": 2.662,
1042
- "eval_samples_per_second": 97.67,
1043
- "eval_steps_per_second": 12.397,
1044
  "step": 1500
1045
  },
1046
  {
1047
  "epoch": 16.41,
1048
- "learning_rate": 0.00013447826086956523,
1049
- "loss": 0.2024,
1050
  "step": 1510
1051
  },
1052
  {
1053
  "epoch": 16.52,
1054
- "learning_rate": 0.00013404347826086958,
1055
- "loss": 0.3111,
1056
  "step": 1520
1057
  },
1058
  {
1059
  "epoch": 16.63,
1060
- "learning_rate": 0.0001336086956521739,
1061
- "loss": 0.4285,
1062
  "step": 1530
1063
  },
1064
  {
1065
  "epoch": 16.74,
1066
- "learning_rate": 0.00013317391304347825,
1067
- "loss": 0.5108,
1068
  "step": 1540
1069
  },
1070
  {
1071
  "epoch": 16.85,
1072
- "learning_rate": 0.00013273913043478262,
1073
- "loss": 0.3297,
1074
  "step": 1550
1075
  },
1076
  {
1077
  "epoch": 16.96,
1078
- "learning_rate": 0.00013230434782608697,
1079
- "loss": 0.3104,
1080
  "step": 1560
1081
  },
1082
  {
1083
  "epoch": 17.07,
1084
- "learning_rate": 0.00013186956521739132,
1085
- "loss": 0.2415,
1086
  "step": 1570
1087
  },
1088
  {
1089
  "epoch": 17.17,
1090
- "learning_rate": 0.00013143478260869567,
1091
- "loss": 0.2966,
1092
  "step": 1580
1093
  },
1094
  {
1095
  "epoch": 17.28,
1096
- "learning_rate": 0.000131,
1097
- "loss": 0.332,
1098
  "step": 1590
1099
  },
1100
  {
1101
  "epoch": 17.39,
1102
- "learning_rate": 0.00013056521739130436,
1103
- "loss": 0.3666,
1104
  "step": 1600
1105
  },
1106
  {
1107
  "epoch": 17.39,
1108
- "eval_accuracy": 0.95,
1109
- "eval_loss": 0.268606036901474,
1110
- "eval_runtime": 1.8148,
1111
- "eval_samples_per_second": 143.265,
1112
- "eval_steps_per_second": 18.184,
1113
  "step": 1600
1114
  },
1115
  {
1116
  "epoch": 17.5,
1117
- "learning_rate": 0.0001301304347826087,
1118
- "loss": 0.2901,
1119
  "step": 1610
1120
  },
1121
  {
1122
  "epoch": 17.61,
1123
- "learning_rate": 0.00012969565217391303,
1124
- "loss": 0.3256,
1125
  "step": 1620
1126
  },
1127
  {
1128
  "epoch": 17.72,
1129
- "learning_rate": 0.00012926086956521738,
1130
- "loss": 0.3876,
1131
  "step": 1630
1132
  },
1133
  {
1134
  "epoch": 17.83,
1135
- "learning_rate": 0.00012882608695652175,
1136
- "loss": 0.248,
1137
  "step": 1640
1138
  },
1139
  {
1140
  "epoch": 17.93,
1141
- "learning_rate": 0.0001283913043478261,
1142
- "loss": 0.328,
1143
  "step": 1650
1144
  },
1145
  {
1146
  "epoch": 18.04,
1147
- "learning_rate": 0.00012795652173913045,
1148
- "loss": 0.1971,
1149
  "step": 1660
1150
  },
1151
  {
1152
  "epoch": 18.15,
1153
- "learning_rate": 0.0001275217391304348,
1154
- "loss": 0.2928,
1155
  "step": 1670
1156
  },
1157
  {
1158
  "epoch": 18.26,
1159
- "learning_rate": 0.00012708695652173914,
1160
- "loss": 0.2983,
1161
  "step": 1680
1162
  },
1163
  {
1164
  "epoch": 18.37,
1165
- "learning_rate": 0.0001266521739130435,
1166
- "loss": 0.2786,
1167
  "step": 1690
1168
  },
1169
  {
1170
  "epoch": 18.48,
1171
- "learning_rate": 0.00012621739130434784,
1172
- "loss": 0.2042,
1173
  "step": 1700
1174
  },
1175
  {
1176
  "epoch": 18.48,
1177
- "eval_accuracy": 0.95,
1178
- "eval_loss": 0.2468329817056656,
1179
- "eval_runtime": 2.6658,
1180
- "eval_samples_per_second": 97.532,
1181
- "eval_steps_per_second": 12.379,
1182
  "step": 1700
1183
  },
1184
  {
1185
  "epoch": 18.59,
1186
- "learning_rate": 0.00012578260869565216,
1187
- "loss": 0.393,
1188
  "step": 1710
1189
  },
1190
  {
1191
  "epoch": 18.7,
1192
- "learning_rate": 0.0001253478260869565,
1193
- "loss": 0.2358,
1194
  "step": 1720
1195
  },
1196
  {
1197
  "epoch": 18.8,
1198
- "learning_rate": 0.00012491304347826088,
1199
- "loss": 0.2882,
1200
  "step": 1730
1201
  },
1202
  {
1203
  "epoch": 18.91,
1204
- "learning_rate": 0.00012447826086956523,
1205
- "loss": 0.2829,
1206
  "step": 1740
1207
  },
1208
  {
1209
  "epoch": 19.02,
1210
- "learning_rate": 0.00012404347826086958,
1211
- "loss": 0.2133,
1212
  "step": 1750
1213
  },
1214
  {
1215
  "epoch": 19.13,
1216
- "learning_rate": 0.00012360869565217393,
1217
- "loss": 0.4416,
1218
  "step": 1760
1219
  },
1220
  {
1221
  "epoch": 19.24,
1222
- "learning_rate": 0.00012317391304347827,
1223
- "loss": 0.2759,
1224
  "step": 1770
1225
  },
1226
  {
1227
  "epoch": 19.35,
1228
- "learning_rate": 0.00012273913043478262,
1229
- "loss": 0.2461,
1230
  "step": 1780
1231
  },
1232
  {
1233
  "epoch": 19.46,
1234
- "learning_rate": 0.00012230434782608694,
1235
- "loss": 0.2832,
1236
  "step": 1790
1237
  },
1238
  {
1239
  "epoch": 19.57,
1240
- "learning_rate": 0.0001218695652173913,
1241
- "loss": 0.4278,
1242
  "step": 1800
1243
  },
1244
  {
1245
  "epoch": 19.57,
1246
- "eval_accuracy": 0.9692307692307692,
1247
- "eval_loss": 0.1991642415523529,
1248
- "eval_runtime": 1.8414,
1249
- "eval_samples_per_second": 141.198,
1250
- "eval_steps_per_second": 17.921,
1251
  "step": 1800
1252
  },
1253
  {
1254
  "epoch": 19.67,
1255
- "learning_rate": 0.00012143478260869565,
1256
- "loss": 0.311,
1257
  "step": 1810
1258
  },
1259
  {
1260
  "epoch": 19.78,
1261
- "learning_rate": 0.000121,
1262
- "loss": 0.2397,
1263
  "step": 1820
1264
  },
1265
  {
1266
  "epoch": 19.89,
1267
- "learning_rate": 0.00012056521739130435,
1268
- "loss": 0.2212,
1269
  "step": 1830
1270
  },
1271
  {
1272
  "epoch": 20.0,
1273
- "learning_rate": 0.00012013043478260871,
1274
- "loss": 0.2397,
1275
  "step": 1840
1276
  },
1277
  {
1278
- "epoch": 20.11,
1279
- "learning_rate": 0.00011969565217391306,
1280
- "loss": 0.4092,
1281
- "step": 1850
1282
- },
1283
- {
1284
- "epoch": 20.22,
1285
- "learning_rate": 0.0001192608695652174,
1286
- "loss": 0.2989,
1287
- "step": 1860
1288
- },
1289
- {
1290
- "epoch": 20.33,
1291
- "learning_rate": 0.00011882608695652175,
1292
- "loss": 0.4813,
1293
- "step": 1870
1294
- },
1295
- {
1296
- "epoch": 20.43,
1297
- "learning_rate": 0.00011839130434782609,
1298
- "loss": 0.0967,
1299
- "step": 1880
1300
- },
1301
- {
1302
- "epoch": 20.54,
1303
- "learning_rate": 0.00011795652173913044,
1304
- "loss": 0.2902,
1305
- "step": 1890
1306
- },
1307
- {
1308
- "epoch": 20.65,
1309
- "learning_rate": 0.00011752173913043478,
1310
- "loss": 0.2769,
1311
- "step": 1900
1312
- },
1313
- {
1314
- "epoch": 20.65,
1315
- "eval_accuracy": 0.95,
1316
- "eval_loss": 0.1938965767621994,
1317
- "eval_runtime": 2.5866,
1318
- "eval_samples_per_second": 100.519,
1319
- "eval_steps_per_second": 12.758,
1320
- "step": 1900
1321
- },
1322
- {
1323
- "epoch": 20.76,
1324
- "learning_rate": 0.00011708695652173913,
1325
- "loss": 0.443,
1326
- "step": 1910
1327
- },
1328
- {
1329
- "epoch": 20.87,
1330
- "learning_rate": 0.00011665217391304348,
1331
- "loss": 0.2882,
1332
- "step": 1920
1333
- },
1334
- {
1335
- "epoch": 20.98,
1336
- "learning_rate": 0.00011621739130434783,
1337
- "loss": 0.3616,
1338
- "step": 1930
1339
- },
1340
- {
1341
- "epoch": 21.09,
1342
- "learning_rate": 0.00011578260869565219,
1343
- "loss": 0.2436,
1344
- "step": 1940
1345
- },
1346
- {
1347
- "epoch": 21.2,
1348
- "learning_rate": 0.00011534782608695654,
1349
- "loss": 0.3428,
1350
- "step": 1950
1351
- },
1352
- {
1353
- "epoch": 21.3,
1354
- "learning_rate": 0.00011491304347826088,
1355
- "loss": 0.2317,
1356
- "step": 1960
1357
- },
1358
- {
1359
- "epoch": 21.41,
1360
- "learning_rate": 0.00011447826086956522,
1361
- "loss": 0.1838,
1362
- "step": 1970
1363
- },
1364
- {
1365
- "epoch": 21.52,
1366
- "learning_rate": 0.00011404347826086957,
1367
- "loss": 0.3383,
1368
- "step": 1980
1369
- },
1370
- {
1371
- "epoch": 21.63,
1372
- "learning_rate": 0.00011360869565217391,
1373
- "loss": 0.3384,
1374
- "step": 1990
1375
- },
1376
- {
1377
- "epoch": 21.74,
1378
- "learning_rate": 0.00011317391304347826,
1379
- "loss": 0.2471,
1380
- "step": 2000
1381
- },
1382
- {
1383
- "epoch": 21.74,
1384
- "eval_accuracy": 0.95,
1385
- "eval_loss": 0.17909857630729675,
1386
- "eval_runtime": 1.8343,
1387
- "eval_samples_per_second": 141.746,
1388
- "eval_steps_per_second": 17.991,
1389
- "step": 2000
1390
- },
1391
- {
1392
- "epoch": 21.85,
1393
- "learning_rate": 0.00011273913043478261,
1394
- "loss": 0.2243,
1395
- "step": 2010
1396
- },
1397
- {
1398
- "epoch": 21.96,
1399
- "learning_rate": 0.00011230434782608696,
1400
- "loss": 0.2756,
1401
- "step": 2020
1402
- },
1403
- {
1404
- "epoch": 22.07,
1405
- "learning_rate": 0.00011186956521739132,
1406
- "loss": 0.2575,
1407
- "step": 2030
1408
- },
1409
- {
1410
- "epoch": 22.17,
1411
- "learning_rate": 0.00011143478260869567,
1412
- "loss": 0.3987,
1413
- "step": 2040
1414
- },
1415
- {
1416
- "epoch": 22.28,
1417
- "learning_rate": 0.00011100000000000001,
1418
- "loss": 0.2974,
1419
- "step": 2050
1420
- },
1421
- {
1422
- "epoch": 22.39,
1423
- "learning_rate": 0.00011056521739130435,
1424
- "loss": 0.2818,
1425
- "step": 2060
1426
- },
1427
- {
1428
- "epoch": 22.5,
1429
- "learning_rate": 0.0001101304347826087,
1430
- "loss": 0.1453,
1431
- "step": 2070
1432
- },
1433
- {
1434
- "epoch": 22.61,
1435
- "learning_rate": 0.00010969565217391304,
1436
- "loss": 0.2972,
1437
- "step": 2080
1438
- },
1439
- {
1440
- "epoch": 22.72,
1441
- "learning_rate": 0.00010926086956521739,
1442
- "loss": 0.1785,
1443
- "step": 2090
1444
- },
1445
- {
1446
- "epoch": 22.83,
1447
- "learning_rate": 0.00010882608695652174,
1448
- "loss": 0.3054,
1449
- "step": 2100
1450
- },
1451
- {
1452
- "epoch": 22.83,
1453
- "eval_accuracy": 0.9615384615384616,
1454
- "eval_loss": 0.18149061501026154,
1455
- "eval_runtime": 2.3493,
1456
- "eval_samples_per_second": 110.67,
1457
- "eval_steps_per_second": 14.047,
1458
- "step": 2100
1459
- },
1460
- {
1461
- "epoch": 22.93,
1462
- "learning_rate": 0.00010839130434782609,
1463
- "loss": 0.2999,
1464
- "step": 2110
1465
- },
1466
- {
1467
- "epoch": 23.04,
1468
- "learning_rate": 0.00010795652173913045,
1469
- "loss": 0.1633,
1470
- "step": 2120
1471
- },
1472
- {
1473
- "epoch": 23.15,
1474
- "learning_rate": 0.0001075217391304348,
1475
- "loss": 0.3255,
1476
- "step": 2130
1477
- },
1478
- {
1479
- "epoch": 23.26,
1480
- "learning_rate": 0.00010708695652173915,
1481
- "loss": 0.2453,
1482
- "step": 2140
1483
- },
1484
- {
1485
- "epoch": 23.37,
1486
- "learning_rate": 0.00010665217391304348,
1487
- "loss": 0.4836,
1488
- "step": 2150
1489
- },
1490
- {
1491
- "epoch": 23.48,
1492
- "learning_rate": 0.00010621739130434783,
1493
- "loss": 0.1796,
1494
- "step": 2160
1495
- },
1496
- {
1497
- "epoch": 23.59,
1498
- "learning_rate": 0.00010578260869565218,
1499
- "loss": 0.1229,
1500
- "step": 2170
1501
- },
1502
- {
1503
- "epoch": 23.7,
1504
- "learning_rate": 0.00010534782608695652,
1505
- "loss": 0.4153,
1506
- "step": 2180
1507
- },
1508
- {
1509
- "epoch": 23.8,
1510
- "learning_rate": 0.00010491304347826087,
1511
- "loss": 0.2937,
1512
- "step": 2190
1513
- },
1514
- {
1515
- "epoch": 23.91,
1516
- "learning_rate": 0.00010447826086956522,
1517
- "loss": 0.4803,
1518
- "step": 2200
1519
- },
1520
- {
1521
- "epoch": 23.91,
1522
- "eval_accuracy": 0.95,
1523
- "eval_loss": 0.23987938463687897,
1524
- "eval_runtime": 1.8204,
1525
- "eval_samples_per_second": 142.824,
1526
- "eval_steps_per_second": 18.128,
1527
- "step": 2200
1528
- },
1529
- {
1530
- "epoch": 24.02,
1531
- "learning_rate": 0.00010404347826086958,
1532
- "loss": 0.1835,
1533
- "step": 2210
1534
- },
1535
- {
1536
- "epoch": 24.13,
1537
- "learning_rate": 0.00010360869565217393,
1538
- "loss": 0.2622,
1539
- "step": 2220
1540
- },
1541
- {
1542
- "epoch": 24.24,
1543
- "learning_rate": 0.00010317391304347828,
1544
- "loss": 0.2113,
1545
- "step": 2230
1546
- },
1547
- {
1548
- "epoch": 24.35,
1549
- "learning_rate": 0.00010273913043478261,
1550
- "loss": 0.1929,
1551
- "step": 2240
1552
- },
1553
- {
1554
- "epoch": 24.46,
1555
- "learning_rate": 0.00010230434782608696,
1556
- "loss": 0.483,
1557
- "step": 2250
1558
- },
1559
- {
1560
- "epoch": 24.57,
1561
- "learning_rate": 0.0001018695652173913,
1562
- "loss": 0.1994,
1563
- "step": 2260
1564
- },
1565
- {
1566
- "epoch": 24.67,
1567
- "learning_rate": 0.00010143478260869565,
1568
- "loss": 0.3179,
1569
- "step": 2270
1570
- },
1571
- {
1572
- "epoch": 24.78,
1573
- "learning_rate": 0.000101,
1574
- "loss": 0.2537,
1575
- "step": 2280
1576
- },
1577
- {
1578
- "epoch": 24.89,
1579
- "learning_rate": 0.00010056521739130435,
1580
- "loss": 0.3047,
1581
- "step": 2290
1582
- },
1583
- {
1584
- "epoch": 25.0,
1585
- "learning_rate": 0.00010013043478260871,
1586
- "loss": 0.218,
1587
- "step": 2300
1588
- },
1589
- {
1590
- "epoch": 25.0,
1591
- "eval_accuracy": 0.9461538461538461,
1592
- "eval_loss": 0.28188276290893555,
1593
- "eval_runtime": 2.2046,
1594
- "eval_samples_per_second": 117.936,
1595
- "eval_steps_per_second": 14.969,
1596
- "step": 2300
1597
- },
1598
- {
1599
- "epoch": 25.11,
1600
- "learning_rate": 9.969565217391305e-05,
1601
- "loss": 0.1038,
1602
- "step": 2310
1603
- },
1604
- {
1605
- "epoch": 25.22,
1606
- "learning_rate": 9.92608695652174e-05,
1607
- "loss": 0.2629,
1608
- "step": 2320
1609
- },
1610
- {
1611
- "epoch": 25.33,
1612
- "learning_rate": 9.882608695652174e-05,
1613
- "loss": 0.2938,
1614
- "step": 2330
1615
- },
1616
- {
1617
- "epoch": 25.43,
1618
- "learning_rate": 9.83913043478261e-05,
1619
- "loss": 0.2453,
1620
- "step": 2340
1621
- },
1622
- {
1623
- "epoch": 25.54,
1624
- "learning_rate": 9.795652173913044e-05,
1625
- "loss": 0.2449,
1626
- "step": 2350
1627
- },
1628
- {
1629
- "epoch": 25.65,
1630
- "learning_rate": 9.752173913043478e-05,
1631
- "loss": 0.355,
1632
- "step": 2360
1633
- },
1634
- {
1635
- "epoch": 25.76,
1636
- "learning_rate": 9.708695652173913e-05,
1637
- "loss": 0.199,
1638
- "step": 2370
1639
- },
1640
- {
1641
- "epoch": 25.87,
1642
- "learning_rate": 9.665217391304348e-05,
1643
- "loss": 0.3433,
1644
- "step": 2380
1645
- },
1646
- {
1647
- "epoch": 25.98,
1648
- "learning_rate": 9.621739130434783e-05,
1649
- "loss": 0.2047,
1650
- "step": 2390
1651
- },
1652
- {
1653
- "epoch": 26.09,
1654
- "learning_rate": 9.578260869565218e-05,
1655
- "loss": 0.1858,
1656
- "step": 2400
1657
- },
1658
- {
1659
- "epoch": 26.09,
1660
- "eval_accuracy": 0.9769230769230769,
1661
- "eval_loss": 0.20682699978351593,
1662
- "eval_runtime": 1.9198,
1663
- "eval_samples_per_second": 135.43,
1664
- "eval_steps_per_second": 17.189,
1665
- "step": 2400
1666
- },
1667
- {
1668
- "epoch": 26.2,
1669
- "learning_rate": 9.534782608695652e-05,
1670
- "loss": 0.2261,
1671
- "step": 2410
1672
- },
1673
- {
1674
- "epoch": 26.3,
1675
- "learning_rate": 9.491304347826087e-05,
1676
- "loss": 0.2663,
1677
- "step": 2420
1678
- },
1679
- {
1680
- "epoch": 26.41,
1681
- "learning_rate": 9.447826086956523e-05,
1682
- "loss": 0.209,
1683
- "step": 2430
1684
- },
1685
- {
1686
- "epoch": 26.52,
1687
- "learning_rate": 9.404347826086957e-05,
1688
- "loss": 0.1664,
1689
- "step": 2440
1690
- },
1691
- {
1692
- "epoch": 26.63,
1693
- "learning_rate": 9.360869565217392e-05,
1694
- "loss": 0.2993,
1695
- "step": 2450
1696
- },
1697
- {
1698
- "epoch": 26.74,
1699
- "learning_rate": 9.317391304347826e-05,
1700
- "loss": 0.1812,
1701
- "step": 2460
1702
- },
1703
- {
1704
- "epoch": 26.85,
1705
- "learning_rate": 9.273913043478261e-05,
1706
- "loss": 0.3832,
1707
- "step": 2470
1708
- },
1709
- {
1710
- "epoch": 26.96,
1711
- "learning_rate": 9.230434782608696e-05,
1712
- "loss": 0.2746,
1713
- "step": 2480
1714
- },
1715
- {
1716
- "epoch": 27.07,
1717
- "learning_rate": 9.186956521739131e-05,
1718
- "loss": 0.2398,
1719
- "step": 2490
1720
- },
1721
- {
1722
- "epoch": 27.17,
1723
- "learning_rate": 9.143478260869566e-05,
1724
- "loss": 0.214,
1725
- "step": 2500
1726
- },
1727
- {
1728
- "epoch": 27.17,
1729
- "eval_accuracy": 0.9461538461538461,
1730
- "eval_loss": 0.2683180868625641,
1731
- "eval_runtime": 1.97,
1732
- "eval_samples_per_second": 131.981,
1733
- "eval_steps_per_second": 16.751,
1734
- "step": 2500
1735
- },
1736
- {
1737
- "epoch": 27.28,
1738
- "learning_rate": 9.1e-05,
1739
- "loss": 0.3322,
1740
- "step": 2510
1741
- },
1742
- {
1743
- "epoch": 27.39,
1744
- "learning_rate": 9.056521739130435e-05,
1745
- "loss": 0.1819,
1746
- "step": 2520
1747
- },
1748
- {
1749
- "epoch": 27.5,
1750
- "learning_rate": 9.01304347826087e-05,
1751
- "loss": 0.358,
1752
- "step": 2530
1753
- },
1754
- {
1755
- "epoch": 27.61,
1756
- "learning_rate": 8.969565217391305e-05,
1757
- "loss": 0.1992,
1758
- "step": 2540
1759
- },
1760
- {
1761
- "epoch": 27.72,
1762
- "learning_rate": 8.92608695652174e-05,
1763
- "loss": 0.2446,
1764
- "step": 2550
1765
- },
1766
- {
1767
- "epoch": 27.83,
1768
- "learning_rate": 8.882608695652174e-05,
1769
- "loss": 0.2713,
1770
- "step": 2560
1771
- },
1772
- {
1773
- "epoch": 27.93,
1774
- "learning_rate": 8.839130434782609e-05,
1775
- "loss": 0.1827,
1776
- "step": 2570
1777
- },
1778
- {
1779
- "epoch": 28.04,
1780
- "learning_rate": 8.795652173913044e-05,
1781
- "loss": 0.3405,
1782
- "step": 2580
1783
- },
1784
- {
1785
- "epoch": 28.15,
1786
- "learning_rate": 8.752173913043479e-05,
1787
- "loss": 0.3026,
1788
- "step": 2590
1789
- },
1790
- {
1791
- "epoch": 28.26,
1792
- "learning_rate": 8.708695652173913e-05,
1793
- "loss": 0.2532,
1794
- "step": 2600
1795
- },
1796
- {
1797
- "epoch": 28.26,
1798
- "eval_accuracy": 0.9615384615384616,
1799
- "eval_loss": 0.13641561567783356,
1800
- "eval_runtime": 1.8692,
1801
- "eval_samples_per_second": 139.096,
1802
- "eval_steps_per_second": 17.654,
1803
- "step": 2600
1804
- },
1805
- {
1806
- "epoch": 28.37,
1807
- "learning_rate": 8.665217391304348e-05,
1808
- "loss": 0.298,
1809
- "step": 2610
1810
- },
1811
- {
1812
- "epoch": 28.48,
1813
- "learning_rate": 8.621739130434783e-05,
1814
- "loss": 0.1697,
1815
- "step": 2620
1816
- },
1817
- {
1818
- "epoch": 28.59,
1819
- "learning_rate": 8.578260869565218e-05,
1820
- "loss": 0.1527,
1821
- "step": 2630
1822
- },
1823
- {
1824
- "epoch": 28.7,
1825
- "learning_rate": 8.534782608695653e-05,
1826
- "loss": 0.1777,
1827
- "step": 2640
1828
- },
1829
- {
1830
- "epoch": 28.8,
1831
- "learning_rate": 8.491304347826087e-05,
1832
- "loss": 0.212,
1833
- "step": 2650
1834
- },
1835
- {
1836
- "epoch": 28.91,
1837
- "learning_rate": 8.447826086956522e-05,
1838
- "loss": 0.3394,
1839
- "step": 2660
1840
- },
1841
- {
1842
- "epoch": 29.02,
1843
- "learning_rate": 8.404347826086957e-05,
1844
- "loss": 0.1577,
1845
- "step": 2670
1846
- },
1847
- {
1848
- "epoch": 29.13,
1849
- "learning_rate": 8.360869565217392e-05,
1850
- "loss": 0.2502,
1851
- "step": 2680
1852
- },
1853
- {
1854
- "epoch": 29.24,
1855
- "learning_rate": 8.317391304347826e-05,
1856
- "loss": 0.2463,
1857
- "step": 2690
1858
- },
1859
- {
1860
- "epoch": 29.35,
1861
- "learning_rate": 8.273913043478261e-05,
1862
- "loss": 0.2021,
1863
- "step": 2700
1864
- },
1865
- {
1866
- "epoch": 29.35,
1867
- "eval_accuracy": 0.9692307692307692,
1868
- "eval_loss": 0.15067297220230103,
1869
- "eval_runtime": 1.8314,
1870
- "eval_samples_per_second": 141.965,
1871
- "eval_steps_per_second": 18.019,
1872
- "step": 2700
1873
- },
1874
- {
1875
- "epoch": 29.46,
1876
- "learning_rate": 8.230434782608696e-05,
1877
- "loss": 0.204,
1878
- "step": 2710
1879
- },
1880
- {
1881
- "epoch": 29.57,
1882
- "learning_rate": 8.186956521739131e-05,
1883
- "loss": 0.2408,
1884
- "step": 2720
1885
- },
1886
- {
1887
- "epoch": 29.67,
1888
- "learning_rate": 8.143478260869566e-05,
1889
- "loss": 0.1918,
1890
- "step": 2730
1891
- },
1892
- {
1893
- "epoch": 29.78,
1894
- "learning_rate": 8.1e-05,
1895
- "loss": 0.1922,
1896
- "step": 2740
1897
- },
1898
- {
1899
- "epoch": 29.89,
1900
- "learning_rate": 8.056521739130435e-05,
1901
- "loss": 0.1565,
1902
- "step": 2750
1903
- },
1904
- {
1905
- "epoch": 30.0,
1906
- "learning_rate": 8.01304347826087e-05,
1907
- "loss": 0.1015,
1908
- "step": 2760
1909
- },
1910
- {
1911
- "epoch": 30.11,
1912
- "learning_rate": 7.969565217391305e-05,
1913
- "loss": 0.267,
1914
- "step": 2770
1915
- },
1916
- {
1917
- "epoch": 30.22,
1918
- "learning_rate": 7.92608695652174e-05,
1919
- "loss": 0.1925,
1920
- "step": 2780
1921
- },
1922
- {
1923
- "epoch": 30.33,
1924
- "learning_rate": 7.882608695652174e-05,
1925
- "loss": 0.189,
1926
- "step": 2790
1927
- },
1928
- {
1929
- "epoch": 30.43,
1930
- "learning_rate": 7.839130434782609e-05,
1931
- "loss": 0.1794,
1932
- "step": 2800
1933
- },
1934
- {
1935
- "epoch": 30.43,
1936
- "eval_accuracy": 0.9692307692307692,
1937
- "eval_loss": 0.14496548473834991,
1938
- "eval_runtime": 1.8418,
1939
- "eval_samples_per_second": 141.168,
1940
- "eval_steps_per_second": 17.917,
1941
- "step": 2800
1942
- },
1943
- {
1944
- "epoch": 30.54,
1945
- "learning_rate": 7.795652173913044e-05,
1946
- "loss": 0.2747,
1947
- "step": 2810
1948
- },
1949
- {
1950
- "epoch": 30.65,
1951
- "learning_rate": 7.752173913043479e-05,
1952
- "loss": 0.2668,
1953
- "step": 2820
1954
- },
1955
- {
1956
- "epoch": 30.76,
1957
- "learning_rate": 7.708695652173913e-05,
1958
- "loss": 0.4758,
1959
- "step": 2830
1960
- },
1961
- {
1962
- "epoch": 30.87,
1963
- "learning_rate": 7.665217391304348e-05,
1964
- "loss": 0.1839,
1965
- "step": 2840
1966
- },
1967
- {
1968
- "epoch": 30.98,
1969
- "learning_rate": 7.621739130434783e-05,
1970
- "loss": 0.3076,
1971
- "step": 2850
1972
- },
1973
- {
1974
- "epoch": 31.09,
1975
- "learning_rate": 7.578260869565218e-05,
1976
- "loss": 0.1099,
1977
- "step": 2860
1978
- },
1979
- {
1980
- "epoch": 31.2,
1981
- "learning_rate": 7.534782608695653e-05,
1982
- "loss": 0.2545,
1983
- "step": 2870
1984
- },
1985
- {
1986
- "epoch": 31.3,
1987
- "learning_rate": 7.491304347826087e-05,
1988
- "loss": 0.2875,
1989
- "step": 2880
1990
- },
1991
- {
1992
- "epoch": 31.41,
1993
- "learning_rate": 7.447826086956522e-05,
1994
- "loss": 0.183,
1995
- "step": 2890
1996
- },
1997
- {
1998
- "epoch": 31.52,
1999
- "learning_rate": 7.404347826086957e-05,
2000
- "loss": 0.1263,
2001
- "step": 2900
2002
- },
2003
- {
2004
- "epoch": 31.52,
2005
- "eval_accuracy": 0.9692307692307692,
2006
- "eval_loss": 0.15624746680259705,
2007
- "eval_runtime": 1.8016,
2008
- "eval_samples_per_second": 144.316,
2009
- "eval_steps_per_second": 18.317,
2010
- "step": 2900
2011
- },
2012
- {
2013
- "epoch": 31.63,
2014
- "learning_rate": 7.360869565217392e-05,
2015
- "loss": 0.2734,
2016
- "step": 2910
2017
- },
2018
- {
2019
- "epoch": 31.74,
2020
- "learning_rate": 7.317391304347827e-05,
2021
- "loss": 0.2466,
2022
- "step": 2920
2023
- },
2024
- {
2025
- "epoch": 31.85,
2026
- "learning_rate": 7.273913043478261e-05,
2027
- "loss": 0.2409,
2028
- "step": 2930
2029
- },
2030
- {
2031
- "epoch": 31.96,
2032
- "learning_rate": 7.230434782608696e-05,
2033
- "loss": 0.1569,
2034
- "step": 2940
2035
- },
2036
- {
2037
- "epoch": 32.07,
2038
- "learning_rate": 7.186956521739131e-05,
2039
- "loss": 0.1666,
2040
- "step": 2950
2041
- },
2042
- {
2043
- "epoch": 32.17,
2044
- "learning_rate": 7.143478260869566e-05,
2045
- "loss": 0.1791,
2046
- "step": 2960
2047
- },
2048
- {
2049
- "epoch": 32.28,
2050
- "learning_rate": 7.1e-05,
2051
- "loss": 0.2163,
2052
- "step": 2970
2053
- },
2054
- {
2055
- "epoch": 32.39,
2056
- "learning_rate": 7.056521739130435e-05,
2057
- "loss": 0.1386,
2058
- "step": 2980
2059
- },
2060
- {
2061
- "epoch": 32.5,
2062
- "learning_rate": 7.01304347826087e-05,
2063
- "loss": 0.2482,
2064
- "step": 2990
2065
- },
2066
- {
2067
- "epoch": 32.61,
2068
- "learning_rate": 6.969565217391305e-05,
2069
- "loss": 0.1298,
2070
- "step": 3000
2071
- },
2072
- {
2073
- "epoch": 32.61,
2074
- "eval_accuracy": 0.9807692307692307,
2075
- "eval_loss": 0.10777488350868225,
2076
- "eval_runtime": 1.8606,
2077
- "eval_samples_per_second": 139.737,
2078
- "eval_steps_per_second": 17.736,
2079
- "step": 3000
2080
- },
2081
- {
2082
- "epoch": 32.72,
2083
- "learning_rate": 6.92608695652174e-05,
2084
- "loss": 0.191,
2085
- "step": 3010
2086
- },
2087
- {
2088
- "epoch": 32.83,
2089
- "learning_rate": 6.882608695652174e-05,
2090
- "loss": 0.1915,
2091
- "step": 3020
2092
- },
2093
- {
2094
- "epoch": 32.93,
2095
- "learning_rate": 6.839130434782609e-05,
2096
- "loss": 0.3359,
2097
- "step": 3030
2098
- },
2099
- {
2100
- "epoch": 33.04,
2101
- "learning_rate": 6.795652173913044e-05,
2102
- "loss": 0.0779,
2103
- "step": 3040
2104
- },
2105
- {
2106
- "epoch": 33.15,
2107
- "learning_rate": 6.752173913043479e-05,
2108
- "loss": 0.2502,
2109
- "step": 3050
2110
- },
2111
- {
2112
- "epoch": 33.26,
2113
- "learning_rate": 6.708695652173914e-05,
2114
- "loss": 0.2406,
2115
- "step": 3060
2116
- },
2117
- {
2118
- "epoch": 33.37,
2119
- "learning_rate": 6.665217391304348e-05,
2120
- "loss": 0.2338,
2121
- "step": 3070
2122
- },
2123
- {
2124
- "epoch": 33.48,
2125
- "learning_rate": 6.621739130434783e-05,
2126
- "loss": 0.1917,
2127
- "step": 3080
2128
- },
2129
- {
2130
- "epoch": 33.59,
2131
- "learning_rate": 6.578260869565218e-05,
2132
- "loss": 0.2137,
2133
- "step": 3090
2134
- },
2135
- {
2136
- "epoch": 33.7,
2137
- "learning_rate": 6.534782608695653e-05,
2138
- "loss": 0.1883,
2139
- "step": 3100
2140
- },
2141
- {
2142
- "epoch": 33.7,
2143
- "eval_accuracy": 0.9846153846153847,
2144
- "eval_loss": 0.07627514749765396,
2145
- "eval_runtime": 1.8071,
2146
- "eval_samples_per_second": 143.881,
2147
- "eval_steps_per_second": 18.262,
2148
- "step": 3100
2149
- },
2150
- {
2151
- "epoch": 33.8,
2152
- "learning_rate": 6.491304347826087e-05,
2153
- "loss": 0.1658,
2154
- "step": 3110
2155
- },
2156
- {
2157
- "epoch": 33.91,
2158
- "learning_rate": 6.447826086956522e-05,
2159
- "loss": 0.1709,
2160
- "step": 3120
2161
- },
2162
- {
2163
- "epoch": 34.02,
2164
- "learning_rate": 6.404347826086957e-05,
2165
- "loss": 0.1934,
2166
- "step": 3130
2167
- },
2168
- {
2169
- "epoch": 34.13,
2170
- "learning_rate": 6.36086956521739e-05,
2171
- "loss": 0.1042,
2172
- "step": 3140
2173
- },
2174
- {
2175
- "epoch": 34.24,
2176
- "learning_rate": 6.317391304347827e-05,
2177
- "loss": 0.1438,
2178
- "step": 3150
2179
- },
2180
- {
2181
- "epoch": 34.35,
2182
- "learning_rate": 6.273913043478261e-05,
2183
- "loss": 0.1592,
2184
- "step": 3160
2185
- },
2186
- {
2187
- "epoch": 34.46,
2188
- "learning_rate": 6.230434782608696e-05,
2189
- "loss": 0.3535,
2190
- "step": 3170
2191
- },
2192
- {
2193
- "epoch": 34.57,
2194
- "learning_rate": 6.186956521739131e-05,
2195
- "loss": 0.1521,
2196
- "step": 3180
2197
- },
2198
- {
2199
- "epoch": 34.67,
2200
- "learning_rate": 6.143478260869566e-05,
2201
- "loss": 0.2928,
2202
- "step": 3190
2203
- },
2204
- {
2205
- "epoch": 34.78,
2206
- "learning_rate": 6.1e-05,
2207
- "loss": 0.1773,
2208
- "step": 3200
2209
- },
2210
- {
2211
- "epoch": 34.78,
2212
- "eval_accuracy": 0.9846153846153847,
2213
- "eval_loss": 0.07703894376754761,
2214
- "eval_runtime": 1.8178,
2215
- "eval_samples_per_second": 143.029,
2216
- "eval_steps_per_second": 18.154,
2217
- "step": 3200
2218
- },
2219
- {
2220
- "epoch": 34.89,
2221
- "learning_rate": 6.056521739130435e-05,
2222
- "loss": 0.1186,
2223
- "step": 3210
2224
- },
2225
- {
2226
- "epoch": 35.0,
2227
- "learning_rate": 6.01304347826087e-05,
2228
- "loss": 0.1795,
2229
- "step": 3220
2230
- },
2231
- {
2232
- "epoch": 35.11,
2233
- "learning_rate": 5.969565217391304e-05,
2234
- "loss": 0.1586,
2235
- "step": 3230
2236
- },
2237
- {
2238
- "epoch": 35.22,
2239
- "learning_rate": 5.926086956521739e-05,
2240
- "loss": 0.2032,
2241
- "step": 3240
2242
- },
2243
- {
2244
- "epoch": 35.33,
2245
- "learning_rate": 5.8826086956521745e-05,
2246
- "loss": 0.1276,
2247
- "step": 3250
2248
- },
2249
- {
2250
- "epoch": 35.43,
2251
- "learning_rate": 5.839130434782609e-05,
2252
- "loss": 0.1245,
2253
- "step": 3260
2254
- },
2255
- {
2256
- "epoch": 35.54,
2257
- "learning_rate": 5.795652173913044e-05,
2258
- "loss": 0.1988,
2259
- "step": 3270
2260
- },
2261
- {
2262
- "epoch": 35.65,
2263
- "learning_rate": 5.752173913043478e-05,
2264
- "loss": 0.0685,
2265
- "step": 3280
2266
- },
2267
- {
2268
- "epoch": 35.76,
2269
- "learning_rate": 5.708695652173913e-05,
2270
- "loss": 0.2522,
2271
- "step": 3290
2272
- },
2273
- {
2274
- "epoch": 35.87,
2275
- "learning_rate": 5.6652173913043484e-05,
2276
- "loss": 0.1491,
2277
- "step": 3300
2278
- },
2279
- {
2280
- "epoch": 35.87,
2281
- "eval_accuracy": 0.9846153846153847,
2282
- "eval_loss": 0.09560323506593704,
2283
- "eval_runtime": 1.8328,
2284
- "eval_samples_per_second": 141.861,
2285
- "eval_steps_per_second": 18.005,
2286
- "step": 3300
2287
- },
2288
- {
2289
- "epoch": 35.98,
2290
- "learning_rate": 5.621739130434783e-05,
2291
- "loss": 0.1288,
2292
- "step": 3310
2293
- },
2294
- {
2295
- "epoch": 36.09,
2296
- "learning_rate": 5.578260869565217e-05,
2297
- "loss": 0.2158,
2298
- "step": 3320
2299
- },
2300
- {
2301
- "epoch": 36.2,
2302
- "learning_rate": 5.534782608695652e-05,
2303
- "loss": 0.2777,
2304
- "step": 3330
2305
- },
2306
- {
2307
- "epoch": 36.3,
2308
- "learning_rate": 5.495652173913044e-05,
2309
- "loss": 0.1669,
2310
- "step": 3340
2311
- },
2312
- {
2313
- "epoch": 36.41,
2314
- "learning_rate": 5.452173913043479e-05,
2315
- "loss": 0.1559,
2316
- "step": 3350
2317
- },
2318
- {
2319
- "epoch": 36.52,
2320
- "learning_rate": 5.408695652173913e-05,
2321
- "loss": 0.3045,
2322
- "step": 3360
2323
- },
2324
- {
2325
- "epoch": 36.63,
2326
- "learning_rate": 5.365217391304348e-05,
2327
- "loss": 0.1495,
2328
- "step": 3370
2329
- },
2330
- {
2331
- "epoch": 36.74,
2332
- "learning_rate": 5.321739130434783e-05,
2333
- "loss": 0.0966,
2334
- "step": 3380
2335
- },
2336
- {
2337
- "epoch": 36.85,
2338
- "learning_rate": 5.278260869565218e-05,
2339
- "loss": 0.1606,
2340
- "step": 3390
2341
- },
2342
- {
2343
- "epoch": 36.96,
2344
- "learning_rate": 5.234782608695652e-05,
2345
- "loss": 0.1866,
2346
- "step": 3400
2347
- },
2348
- {
2349
- "epoch": 36.96,
2350
- "eval_accuracy": 0.9846153846153847,
2351
- "eval_loss": 0.11623835563659668,
2352
- "eval_runtime": 1.8358,
2353
- "eval_samples_per_second": 141.631,
2354
- "eval_steps_per_second": 17.976,
2355
- "step": 3400
2356
- },
2357
- {
2358
- "epoch": 37.07,
2359
- "learning_rate": 5.191304347826087e-05,
2360
- "loss": 0.1166,
2361
- "step": 3410
2362
- },
2363
- {
2364
- "epoch": 37.17,
2365
- "learning_rate": 5.147826086956522e-05,
2366
- "loss": 0.1632,
2367
- "step": 3420
2368
- },
2369
- {
2370
- "epoch": 37.28,
2371
- "learning_rate": 5.104347826086957e-05,
2372
- "loss": 0.3038,
2373
- "step": 3430
2374
- },
2375
- {
2376
- "epoch": 37.39,
2377
- "learning_rate": 5.060869565217392e-05,
2378
- "loss": 0.1259,
2379
- "step": 3440
2380
- },
2381
- {
2382
- "epoch": 37.5,
2383
- "learning_rate": 5.017391304347826e-05,
2384
- "loss": 0.1906,
2385
- "step": 3450
2386
- },
2387
- {
2388
- "epoch": 37.61,
2389
- "learning_rate": 4.973913043478261e-05,
2390
- "loss": 0.1809,
2391
- "step": 3460
2392
- },
2393
- {
2394
- "epoch": 37.72,
2395
- "learning_rate": 4.930434782608696e-05,
2396
- "loss": 0.1133,
2397
- "step": 3470
2398
- },
2399
- {
2400
- "epoch": 37.83,
2401
- "learning_rate": 4.8869565217391305e-05,
2402
- "loss": 0.2476,
2403
- "step": 3480
2404
- },
2405
- {
2406
- "epoch": 37.93,
2407
- "learning_rate": 4.843478260869565e-05,
2408
- "loss": 0.2042,
2409
- "step": 3490
2410
- },
2411
- {
2412
- "epoch": 38.04,
2413
- "learning_rate": 4.8e-05,
2414
- "loss": 0.1269,
2415
- "step": 3500
2416
- },
2417
- {
2418
- "epoch": 38.04,
2419
- "eval_accuracy": 0.9769230769230769,
2420
- "eval_loss": 0.13658243417739868,
2421
- "eval_runtime": 1.8465,
2422
- "eval_samples_per_second": 140.804,
2423
- "eval_steps_per_second": 17.871,
2424
- "step": 3500
2425
- },
2426
- {
2427
- "epoch": 38.15,
2428
- "learning_rate": 4.756521739130435e-05,
2429
- "loss": 0.244,
2430
- "step": 3510
2431
- },
2432
- {
2433
- "epoch": 38.26,
2434
- "learning_rate": 4.71304347826087e-05,
2435
- "loss": 0.1274,
2436
- "step": 3520
2437
- },
2438
- {
2439
- "epoch": 38.37,
2440
- "learning_rate": 4.6695652173913045e-05,
2441
- "loss": 0.175,
2442
- "step": 3530
2443
- },
2444
- {
2445
- "epoch": 38.48,
2446
- "learning_rate": 4.62608695652174e-05,
2447
- "loss": 0.2629,
2448
- "step": 3540
2449
- },
2450
- {
2451
- "epoch": 38.59,
2452
- "learning_rate": 4.582608695652174e-05,
2453
- "loss": 0.1863,
2454
- "step": 3550
2455
- },
2456
- {
2457
- "epoch": 38.7,
2458
- "learning_rate": 4.539130434782609e-05,
2459
- "loss": 0.1337,
2460
- "step": 3560
2461
- },
2462
- {
2463
- "epoch": 38.8,
2464
- "learning_rate": 4.4956521739130436e-05,
2465
- "loss": 0.1694,
2466
- "step": 3570
2467
- },
2468
- {
2469
- "epoch": 38.91,
2470
- "learning_rate": 4.4521739130434784e-05,
2471
- "loss": 0.1895,
2472
- "step": 3580
2473
- },
2474
- {
2475
- "epoch": 39.02,
2476
- "learning_rate": 4.408695652173913e-05,
2477
- "loss": 0.1817,
2478
- "step": 3590
2479
- },
2480
- {
2481
- "epoch": 39.13,
2482
- "learning_rate": 4.365217391304348e-05,
2483
- "loss": 0.1498,
2484
- "step": 3600
2485
- },
2486
- {
2487
- "epoch": 39.13,
2488
- "eval_accuracy": 0.9884615384615385,
2489
- "eval_loss": 0.12455728650093079,
2490
- "eval_runtime": 1.8064,
2491
- "eval_samples_per_second": 143.932,
2492
- "eval_steps_per_second": 18.268,
2493
- "step": 3600
2494
- },
2495
- {
2496
- "epoch": 39.24,
2497
- "learning_rate": 4.321739130434783e-05,
2498
- "loss": 0.1889,
2499
- "step": 3610
2500
- },
2501
- {
2502
- "epoch": 39.35,
2503
- "learning_rate": 4.2782608695652176e-05,
2504
- "loss": 0.2648,
2505
- "step": 3620
2506
- },
2507
- {
2508
- "epoch": 39.46,
2509
- "learning_rate": 4.2347826086956523e-05,
2510
- "loss": 0.1157,
2511
- "step": 3630
2512
- },
2513
- {
2514
- "epoch": 39.57,
2515
- "learning_rate": 4.191304347826087e-05,
2516
- "loss": 0.2186,
2517
- "step": 3640
2518
- },
2519
- {
2520
- "epoch": 39.67,
2521
- "learning_rate": 4.147826086956522e-05,
2522
- "loss": 0.1046,
2523
- "step": 3650
2524
- },
2525
- {
2526
- "epoch": 39.78,
2527
- "learning_rate": 4.104347826086957e-05,
2528
- "loss": 0.0709,
2529
- "step": 3660
2530
- },
2531
- {
2532
- "epoch": 39.89,
2533
- "learning_rate": 4.0608695652173915e-05,
2534
- "loss": 0.1139,
2535
- "step": 3670
2536
- },
2537
- {
2538
- "epoch": 40.0,
2539
- "learning_rate": 4.017391304347826e-05,
2540
- "loss": 0.114,
2541
- "step": 3680
2542
- },
2543
- {
2544
- "epoch": 40.11,
2545
- "learning_rate": 3.973913043478261e-05,
2546
- "loss": 0.2728,
2547
- "step": 3690
2548
- },
2549
- {
2550
- "epoch": 40.22,
2551
- "learning_rate": 3.930434782608696e-05,
2552
- "loss": 0.1981,
2553
- "step": 3700
2554
- },
2555
- {
2556
- "epoch": 40.22,
2557
- "eval_accuracy": 0.9807692307692307,
2558
- "eval_loss": 0.09199390560388565,
2559
- "eval_runtime": 1.826,
2560
- "eval_samples_per_second": 142.387,
2561
- "eval_steps_per_second": 18.072,
2562
- "step": 3700
2563
- },
2564
- {
2565
- "epoch": 40.33,
2566
- "learning_rate": 3.8869565217391306e-05,
2567
- "loss": 0.2243,
2568
- "step": 3710
2569
- },
2570
- {
2571
- "epoch": 40.43,
2572
- "learning_rate": 3.8434782608695654e-05,
2573
- "loss": 0.1076,
2574
- "step": 3720
2575
- },
2576
- {
2577
- "epoch": 40.54,
2578
- "learning_rate": 3.8e-05,
2579
- "loss": 0.1188,
2580
- "step": 3730
2581
- },
2582
- {
2583
- "epoch": 40.65,
2584
- "learning_rate": 3.756521739130435e-05,
2585
- "loss": 0.1285,
2586
- "step": 3740
2587
- },
2588
- {
2589
- "epoch": 40.76,
2590
- "learning_rate": 3.71304347826087e-05,
2591
- "loss": 0.1592,
2592
- "step": 3750
2593
- },
2594
- {
2595
- "epoch": 40.87,
2596
- "learning_rate": 3.6695652173913046e-05,
2597
- "loss": 0.0863,
2598
- "step": 3760
2599
- },
2600
- {
2601
- "epoch": 40.98,
2602
- "learning_rate": 3.6260869565217394e-05,
2603
- "loss": 0.1847,
2604
- "step": 3770
2605
- },
2606
- {
2607
- "epoch": 41.09,
2608
- "learning_rate": 3.582608695652174e-05,
2609
- "loss": 0.1492,
2610
- "step": 3780
2611
- },
2612
- {
2613
- "epoch": 41.2,
2614
- "learning_rate": 3.539130434782609e-05,
2615
- "loss": 0.2077,
2616
- "step": 3790
2617
- },
2618
- {
2619
- "epoch": 41.3,
2620
- "learning_rate": 3.495652173913044e-05,
2621
- "loss": 0.1421,
2622
- "step": 3800
2623
- },
2624
- {
2625
- "epoch": 41.3,
2626
- "eval_accuracy": 0.9807692307692307,
2627
- "eval_loss": 0.09830256551504135,
2628
- "eval_runtime": 1.8855,
2629
- "eval_samples_per_second": 137.894,
2630
- "eval_steps_per_second": 17.502,
2631
- "step": 3800
2632
- },
2633
- {
2634
- "epoch": 41.41,
2635
- "learning_rate": 3.4521739130434785e-05,
2636
- "loss": 0.1659,
2637
- "step": 3810
2638
- },
2639
- {
2640
- "epoch": 41.52,
2641
- "learning_rate": 3.408695652173913e-05,
2642
- "loss": 0.1637,
2643
- "step": 3820
2644
- },
2645
- {
2646
- "epoch": 41.63,
2647
- "learning_rate": 3.365217391304348e-05,
2648
- "loss": 0.1876,
2649
- "step": 3830
2650
- },
2651
- {
2652
- "epoch": 41.74,
2653
- "learning_rate": 3.321739130434783e-05,
2654
- "loss": 0.2014,
2655
- "step": 3840
2656
- },
2657
- {
2658
- "epoch": 41.85,
2659
- "learning_rate": 3.278260869565217e-05,
2660
- "loss": 0.2616,
2661
- "step": 3850
2662
- },
2663
- {
2664
- "epoch": 41.96,
2665
- "learning_rate": 3.2347826086956524e-05,
2666
- "loss": 0.2388,
2667
- "step": 3860
2668
- },
2669
- {
2670
- "epoch": 42.07,
2671
- "learning_rate": 3.191304347826087e-05,
2672
- "loss": 0.1195,
2673
- "step": 3870
2674
- },
2675
- {
2676
- "epoch": 42.17,
2677
- "learning_rate": 3.147826086956522e-05,
2678
- "loss": 0.0911,
2679
- "step": 3880
2680
- },
2681
- {
2682
- "epoch": 42.28,
2683
- "learning_rate": 3.104347826086957e-05,
2684
- "loss": 0.1069,
2685
- "step": 3890
2686
- },
2687
- {
2688
- "epoch": 42.39,
2689
- "learning_rate": 3.0608695652173916e-05,
2690
- "loss": 0.1936,
2691
- "step": 3900
2692
- },
2693
- {
2694
- "epoch": 42.39,
2695
- "eval_accuracy": 0.9769230769230769,
2696
- "eval_loss": 0.11225084215402603,
2697
- "eval_runtime": 1.8302,
2698
- "eval_samples_per_second": 142.059,
2699
- "eval_steps_per_second": 18.031,
2700
- "step": 3900
2701
- },
2702
- {
2703
- "epoch": 42.5,
2704
- "learning_rate": 3.0173913043478264e-05,
2705
- "loss": 0.1685,
2706
- "step": 3910
2707
- },
2708
- {
2709
- "epoch": 42.61,
2710
- "learning_rate": 2.9739130434782608e-05,
2711
- "loss": 0.0714,
2712
- "step": 3920
2713
- },
2714
- {
2715
- "epoch": 42.72,
2716
- "learning_rate": 2.930434782608696e-05,
2717
- "loss": 0.1757,
2718
- "step": 3930
2719
- },
2720
- {
2721
- "epoch": 42.83,
2722
- "learning_rate": 2.8869565217391304e-05,
2723
- "loss": 0.1768,
2724
- "step": 3940
2725
- },
2726
- {
2727
- "epoch": 42.93,
2728
- "learning_rate": 2.8434782608695655e-05,
2729
- "loss": 0.1389,
2730
- "step": 3950
2731
- },
2732
- {
2733
- "epoch": 43.04,
2734
- "learning_rate": 2.8000000000000003e-05,
2735
- "loss": 0.1236,
2736
- "step": 3960
2737
- },
2738
- {
2739
- "epoch": 43.15,
2740
- "learning_rate": 2.7565217391304347e-05,
2741
- "loss": 0.1475,
2742
- "step": 3970
2743
- },
2744
- {
2745
- "epoch": 43.26,
2746
- "learning_rate": 2.71304347826087e-05,
2747
- "loss": 0.0922,
2748
- "step": 3980
2749
- },
2750
- {
2751
- "epoch": 43.37,
2752
- "learning_rate": 2.6695652173913043e-05,
2753
- "loss": 0.1054,
2754
- "step": 3990
2755
- },
2756
- {
2757
- "epoch": 43.48,
2758
- "learning_rate": 2.6260869565217394e-05,
2759
- "loss": 0.097,
2760
- "step": 4000
2761
- },
2762
- {
2763
- "epoch": 43.48,
2764
- "eval_accuracy": 0.9769230769230769,
2765
- "eval_loss": 0.15064960718154907,
2766
- "eval_runtime": 1.9877,
2767
- "eval_samples_per_second": 130.807,
2768
- "eval_steps_per_second": 16.602,
2769
- "step": 4000
2770
- },
2771
- {
2772
- "epoch": 43.59,
2773
- "learning_rate": 2.582608695652174e-05,
2774
- "loss": 0.1154,
2775
- "step": 4010
2776
- },
2777
- {
2778
- "epoch": 43.7,
2779
- "learning_rate": 2.539130434782609e-05,
2780
- "loss": 0.1719,
2781
- "step": 4020
2782
- },
2783
- {
2784
- "epoch": 43.8,
2785
- "learning_rate": 2.4956521739130438e-05,
2786
- "loss": 0.0627,
2787
- "step": 4030
2788
- },
2789
- {
2790
- "epoch": 43.91,
2791
- "learning_rate": 2.4521739130434786e-05,
2792
- "loss": 0.1385,
2793
- "step": 4040
2794
- },
2795
- {
2796
- "epoch": 44.02,
2797
- "learning_rate": 2.408695652173913e-05,
2798
- "loss": 0.1673,
2799
- "step": 4050
2800
- },
2801
- {
2802
- "epoch": 44.13,
2803
- "learning_rate": 2.3652173913043478e-05,
2804
- "loss": 0.1678,
2805
- "step": 4060
2806
- },
2807
- {
2808
- "epoch": 44.24,
2809
- "learning_rate": 2.3217391304347826e-05,
2810
- "loss": 0.1634,
2811
- "step": 4070
2812
- },
2813
- {
2814
- "epoch": 44.35,
2815
- "learning_rate": 2.2782608695652174e-05,
2816
- "loss": 0.0712,
2817
- "step": 4080
2818
- },
2819
- {
2820
- "epoch": 44.46,
2821
- "learning_rate": 2.2347826086956522e-05,
2822
- "loss": 0.1366,
2823
- "step": 4090
2824
- },
2825
- {
2826
- "epoch": 44.57,
2827
- "learning_rate": 2.191304347826087e-05,
2828
- "loss": 0.0791,
2829
- "step": 4100
2830
- },
2831
- {
2832
- "epoch": 44.57,
2833
- "eval_accuracy": 0.9769230769230769,
2834
- "eval_loss": 0.14431653916835785,
2835
- "eval_runtime": 1.8767,
2836
- "eval_samples_per_second": 138.543,
2837
- "eval_steps_per_second": 17.584,
2838
- "step": 4100
2839
- },
2840
- {
2841
- "epoch": 44.67,
2842
- "learning_rate": 2.1478260869565218e-05,
2843
- "loss": 0.0557,
2844
- "step": 4110
2845
- },
2846
- {
2847
- "epoch": 44.78,
2848
- "learning_rate": 2.104347826086957e-05,
2849
- "loss": 0.1431,
2850
- "step": 4120
2851
- },
2852
- {
2853
- "epoch": 44.89,
2854
- "learning_rate": 2.0608695652173913e-05,
2855
- "loss": 0.1888,
2856
- "step": 4130
2857
- },
2858
- {
2859
- "epoch": 45.0,
2860
- "learning_rate": 2.017391304347826e-05,
2861
- "loss": 0.0463,
2862
- "step": 4140
2863
- },
2864
- {
2865
- "epoch": 45.11,
2866
- "learning_rate": 1.973913043478261e-05,
2867
- "loss": 0.1608,
2868
- "step": 4150
2869
- },
2870
- {
2871
- "epoch": 45.22,
2872
- "learning_rate": 1.9304347826086957e-05,
2873
- "loss": 0.1379,
2874
- "step": 4160
2875
- },
2876
- {
2877
- "epoch": 45.33,
2878
- "learning_rate": 1.8869565217391305e-05,
2879
- "loss": 0.2226,
2880
- "step": 4170
2881
- },
2882
- {
2883
- "epoch": 45.43,
2884
- "learning_rate": 1.8434782608695653e-05,
2885
- "loss": 0.1258,
2886
- "step": 4180
2887
- },
2888
- {
2889
- "epoch": 45.54,
2890
- "learning_rate": 1.8e-05,
2891
- "loss": 0.1235,
2892
- "step": 4190
2893
- },
2894
- {
2895
- "epoch": 45.65,
2896
- "learning_rate": 1.756521739130435e-05,
2897
- "loss": 0.0935,
2898
- "step": 4200
2899
- },
2900
- {
2901
- "epoch": 45.65,
2902
- "eval_accuracy": 0.9807692307692307,
2903
- "eval_loss": 0.1689443737268448,
2904
- "eval_runtime": 2.3394,
2905
- "eval_samples_per_second": 111.142,
2906
- "eval_steps_per_second": 14.106,
2907
- "step": 4200
2908
- },
2909
- {
2910
- "epoch": 45.76,
2911
- "learning_rate": 1.7130434782608696e-05,
2912
- "loss": 0.0753,
2913
- "step": 4210
2914
- },
2915
- {
2916
- "epoch": 45.87,
2917
- "learning_rate": 1.6695652173913044e-05,
2918
- "loss": 0.1414,
2919
- "step": 4220
2920
- },
2921
- {
2922
- "epoch": 45.98,
2923
- "learning_rate": 1.6260869565217392e-05,
2924
- "loss": 0.131,
2925
- "step": 4230
2926
- },
2927
- {
2928
- "epoch": 46.09,
2929
- "learning_rate": 1.582608695652174e-05,
2930
- "loss": 0.2012,
2931
- "step": 4240
2932
- },
2933
- {
2934
- "epoch": 46.2,
2935
- "learning_rate": 1.5434782608695654e-05,
2936
- "loss": 0.1882,
2937
- "step": 4250
2938
- },
2939
- {
2940
- "epoch": 46.3,
2941
- "learning_rate": 1.5e-05,
2942
- "loss": 0.1026,
2943
- "step": 4260
2944
- },
2945
- {
2946
- "epoch": 46.41,
2947
- "learning_rate": 1.4565217391304348e-05,
2948
- "loss": 0.0967,
2949
- "step": 4270
2950
- },
2951
- {
2952
- "epoch": 46.52,
2953
- "learning_rate": 1.4130434782608694e-05,
2954
- "loss": 0.2006,
2955
- "step": 4280
2956
- },
2957
- {
2958
- "epoch": 46.63,
2959
- "learning_rate": 1.3695652173913042e-05,
2960
- "loss": 0.1477,
2961
- "step": 4290
2962
- },
2963
- {
2964
- "epoch": 46.74,
2965
- "learning_rate": 1.3260869565217394e-05,
2966
- "loss": 0.1061,
2967
- "step": 4300
2968
- },
2969
- {
2970
- "epoch": 46.74,
2971
- "eval_accuracy": 0.9807692307692307,
2972
- "eval_loss": 0.13794825971126556,
2973
- "eval_runtime": 1.8376,
2974
- "eval_samples_per_second": 141.486,
2975
- "eval_steps_per_second": 17.958,
2976
- "step": 4300
2977
- },
2978
- {
2979
- "epoch": 46.85,
2980
- "learning_rate": 1.2826086956521741e-05,
2981
- "loss": 0.1748,
2982
- "step": 4310
2983
- },
2984
- {
2985
- "epoch": 46.96,
2986
- "learning_rate": 1.2391304347826088e-05,
2987
- "loss": 0.0196,
2988
- "step": 4320
2989
- },
2990
- {
2991
- "epoch": 47.07,
2992
- "learning_rate": 1.1956521739130435e-05,
2993
- "loss": 0.076,
2994
- "step": 4330
2995
- },
2996
- {
2997
- "epoch": 47.17,
2998
- "learning_rate": 1.1521739130434783e-05,
2999
- "loss": 0.1635,
3000
- "step": 4340
3001
- },
3002
- {
3003
- "epoch": 47.28,
3004
- "learning_rate": 1.1086956521739131e-05,
3005
- "loss": 0.1393,
3006
- "step": 4350
3007
- },
3008
- {
3009
- "epoch": 47.39,
3010
- "learning_rate": 1.0652173913043479e-05,
3011
- "loss": 0.0914,
3012
- "step": 4360
3013
- },
3014
- {
3015
- "epoch": 47.5,
3016
- "learning_rate": 1.0217391304347827e-05,
3017
- "loss": 0.0768,
3018
- "step": 4370
3019
- },
3020
- {
3021
- "epoch": 47.61,
3022
- "learning_rate": 9.782608695652175e-06,
3023
- "loss": 0.1176,
3024
- "step": 4380
3025
- },
3026
- {
3027
- "epoch": 47.72,
3028
- "learning_rate": 9.347826086956523e-06,
3029
- "loss": 0.0986,
3030
- "step": 4390
3031
- },
3032
- {
3033
- "epoch": 47.83,
3034
- "learning_rate": 8.91304347826087e-06,
3035
- "loss": 0.2041,
3036
- "step": 4400
3037
- },
3038
- {
3039
- "epoch": 47.83,
3040
- "eval_accuracy": 0.9769230769230769,
3041
- "eval_loss": 0.1391335427761078,
3042
- "eval_runtime": 2.4421,
3043
- "eval_samples_per_second": 106.467,
3044
- "eval_steps_per_second": 13.513,
3045
- "step": 4400
3046
- },
3047
- {
3048
- "epoch": 47.93,
3049
- "learning_rate": 8.478260869565217e-06,
3050
- "loss": 0.0854,
3051
- "step": 4410
3052
- },
3053
- {
3054
- "epoch": 48.04,
3055
- "learning_rate": 8.043478260869565e-06,
3056
- "loss": 0.1058,
3057
- "step": 4420
3058
- },
3059
- {
3060
- "epoch": 48.15,
3061
- "learning_rate": 7.608695652173914e-06,
3062
- "loss": 0.1268,
3063
- "step": 4430
3064
- },
3065
- {
3066
- "epoch": 48.26,
3067
- "learning_rate": 7.173913043478261e-06,
3068
- "loss": 0.0727,
3069
- "step": 4440
3070
- },
3071
- {
3072
- "epoch": 48.37,
3073
- "learning_rate": 6.739130434782609e-06,
3074
- "loss": 0.0944,
3075
- "step": 4450
3076
- },
3077
- {
3078
- "epoch": 48.48,
3079
- "learning_rate": 6.304347826086957e-06,
3080
- "loss": 0.1517,
3081
- "step": 4460
3082
- },
3083
- {
3084
- "epoch": 48.59,
3085
- "learning_rate": 5.869565217391305e-06,
3086
- "loss": 0.1887,
3087
- "step": 4470
3088
- },
3089
- {
3090
- "epoch": 48.7,
3091
- "learning_rate": 5.4347826086956525e-06,
3092
- "loss": 0.1824,
3093
- "step": 4480
3094
- },
3095
- {
3096
- "epoch": 48.8,
3097
- "learning_rate": 5e-06,
3098
- "loss": 0.1564,
3099
- "step": 4490
3100
- },
3101
- {
3102
- "epoch": 48.91,
3103
- "learning_rate": 4.565217391304348e-06,
3104
- "loss": 0.0262,
3105
- "step": 4500
3106
- },
3107
- {
3108
- "epoch": 48.91,
3109
- "eval_accuracy": 0.9769230769230769,
3110
- "eval_loss": 0.135969340801239,
3111
- "eval_runtime": 1.8658,
3112
- "eval_samples_per_second": 139.349,
3113
- "eval_steps_per_second": 17.687,
3114
- "step": 4500
3115
- },
3116
- {
3117
- "epoch": 49.02,
3118
- "learning_rate": 4.130434782608695e-06,
3119
- "loss": 0.1187,
3120
- "step": 4510
3121
- },
3122
- {
3123
- "epoch": 49.13,
3124
- "learning_rate": 3.695652173913044e-06,
3125
- "loss": 0.0973,
3126
- "step": 4520
3127
- },
3128
- {
3129
- "epoch": 49.24,
3130
- "learning_rate": 3.2608695652173914e-06,
3131
- "loss": 0.1082,
3132
- "step": 4530
3133
- },
3134
- {
3135
- "epoch": 49.35,
3136
- "learning_rate": 2.8260869565217393e-06,
3137
- "loss": 0.1439,
3138
- "step": 4540
3139
- },
3140
- {
3141
- "epoch": 49.46,
3142
- "learning_rate": 2.391304347826087e-06,
3143
- "loss": 0.1052,
3144
- "step": 4550
3145
- },
3146
- {
3147
- "epoch": 49.57,
3148
- "learning_rate": 1.956521739130435e-06,
3149
- "loss": 0.1465,
3150
- "step": 4560
3151
- },
3152
- {
3153
- "epoch": 49.67,
3154
- "learning_rate": 1.5217391304347827e-06,
3155
- "loss": 0.1092,
3156
- "step": 4570
3157
- },
3158
- {
3159
- "epoch": 49.78,
3160
- "learning_rate": 1.0869565217391306e-06,
3161
- "loss": 0.1454,
3162
- "step": 4580
3163
- },
3164
- {
3165
- "epoch": 49.89,
3166
- "learning_rate": 6.521739130434782e-07,
3167
- "loss": 0.1703,
3168
- "step": 4590
3169
- },
3170
- {
3171
- "epoch": 50.0,
3172
- "learning_rate": 2.173913043478261e-07,
3173
- "loss": 0.1274,
3174
- "step": 4600
3175
- },
3176
- {
3177
- "epoch": 50.0,
3178
- "eval_accuracy": 0.9769230769230769,
3179
- "eval_loss": 0.13699783384799957,
3180
- "eval_runtime": 2.6653,
3181
- "eval_samples_per_second": 97.548,
3182
- "eval_steps_per_second": 12.381,
3183
- "step": 4600
3184
- },
3185
- {
3186
- "epoch": 50.0,
3187
- "step": 4600,
3188
- "total_flos": 1.8256530865176576e+18,
3189
- "train_loss": 0.3526626825786155,
3190
- "train_runtime": 995.2393,
3191
- "train_samples_per_second": 73.751,
3192
- "train_steps_per_second": 4.622
3193
  }
3194
  ],
3195
  "logging_steps": 10,
3196
- "max_steps": 4600,
3197
- "num_train_epochs": 50,
3198
  "save_steps": 100,
3199
- "total_flos": 1.8256530865176576e+18,
3200
  "trial_name": null,
3201
  "trial_params": null
3202
  }
 
1
  {
2
+ "best_metric": 0.9846153846153847,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned_ASL_Isolated_Swin_dataset2/checkpoint-1700",
4
+ "epoch": 20.0,
5
  "eval_steps": 100,
6
+ "global_step": 1840,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.11,
13
+ "learning_rate": 0.00019891304347826087,
14
+ "loss": 3.3365,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.22,
19
+ "learning_rate": 0.00019782608695652175,
20
+ "loss": 3.2188,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.33,
25
+ "learning_rate": 0.00019673913043478263,
26
+ "loss": 3.1756,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.43,
31
+ "learning_rate": 0.0001956521739130435,
32
+ "loss": 2.9023,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.54,
37
+ "learning_rate": 0.00019467391304347825,
38
+ "loss": 2.5517,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.65,
43
+ "learning_rate": 0.00019358695652173916,
44
+ "loss": 2.4131,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.76,
49
+ "learning_rate": 0.00019250000000000002,
50
+ "loss": 2.0957,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.87,
55
+ "learning_rate": 0.00019141304347826087,
56
+ "loss": 1.8856,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.98,
61
+ "learning_rate": 0.00019032608695652176,
62
+ "loss": 1.7217,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 1.09,
67
+ "learning_rate": 0.0001892391304347826,
68
+ "loss": 1.6758,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 1.09,
73
+ "eval_accuracy": 0.5769230769230769,
74
+ "eval_loss": 1.320558786392212,
75
+ "eval_runtime": 2.846,
76
+ "eval_samples_per_second": 91.356,
77
+ "eval_steps_per_second": 11.595,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 1.2,
82
+ "learning_rate": 0.0001881521739130435,
83
+ "loss": 1.4695,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 1.3,
88
+ "learning_rate": 0.00018706521739130435,
89
+ "loss": 1.311,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 1.41,
94
+ "learning_rate": 0.00018597826086956523,
95
+ "loss": 1.2741,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 1.52,
100
+ "learning_rate": 0.0001848913043478261,
101
+ "loss": 1.3444,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 1.63,
106
+ "learning_rate": 0.00018380434782608697,
107
+ "loss": 1.0445,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 1.74,
112
+ "learning_rate": 0.00018282608695652174,
113
+ "loss": 1.0316,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 1.85,
118
+ "learning_rate": 0.00018173913043478262,
119
+ "loss": 1.0877,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 1.96,
124
+ "learning_rate": 0.00018065217391304348,
125
+ "loss": 1.1561,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 2.07,
130
+ "learning_rate": 0.00017956521739130436,
131
+ "loss": 1.0726,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 2.17,
136
+ "learning_rate": 0.00017847826086956522,
137
+ "loss": 1.0717,
138
  "step": 200
139
  },
140
  {
141
  "epoch": 2.17,
142
+ "eval_accuracy": 0.8153846153846154,
143
+ "eval_loss": 0.6482219696044922,
144
+ "eval_runtime": 1.825,
145
+ "eval_samples_per_second": 142.465,
146
+ "eval_steps_per_second": 18.082,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 2.28,
151
+ "learning_rate": 0.0001773913043478261,
152
+ "loss": 0.744,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 2.39,
157
+ "learning_rate": 0.00017630434782608695,
158
+ "loss": 0.9959,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 2.5,
163
+ "learning_rate": 0.00017521739130434784,
164
+ "loss": 0.9342,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 2.61,
169
+ "learning_rate": 0.00017413043478260872,
170
+ "loss": 0.8275,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 2.72,
175
+ "learning_rate": 0.00017304347826086958,
176
+ "loss": 0.8388,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 2.83,
181
+ "learning_rate": 0.00017195652173913043,
182
+ "loss": 0.8996,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 2.93,
187
+ "learning_rate": 0.00017086956521739132,
188
+ "loss": 0.6616,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 3.04,
193
+ "learning_rate": 0.0001697826086956522,
194
+ "loss": 0.7714,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 3.15,
199
+ "learning_rate": 0.00016869565217391306,
200
+ "loss": 0.7296,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 3.26,
205
+ "learning_rate": 0.0001676086956521739,
206
+ "loss": 0.627,
207
  "step": 300
208
  },
209
  {
210
  "epoch": 3.26,
211
+ "eval_accuracy": 0.8653846153846154,
212
+ "eval_loss": 0.4485549330711365,
213
+ "eval_runtime": 2.5537,
214
+ "eval_samples_per_second": 101.814,
215
+ "eval_steps_per_second": 12.923,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 3.37,
220
+ "learning_rate": 0.0001666304347826087,
221
+ "loss": 0.8732,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 3.48,
226
+ "learning_rate": 0.00016554347826086958,
227
+ "loss": 0.8933,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 3.59,
232
+ "learning_rate": 0.00016445652173913044,
233
+ "loss": 0.8468,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 3.7,
238
+ "learning_rate": 0.0001633695652173913,
239
+ "loss": 0.6468,
240
  "step": 340
241
  },
242
  {
243
  "epoch": 3.8,
244
+ "learning_rate": 0.00016228260869565218,
245
+ "loss": 0.7266,
246
  "step": 350
247
  },
248
  {
249
  "epoch": 3.91,
250
+ "learning_rate": 0.00016119565217391306,
251
+ "loss": 0.8522,
252
  "step": 360
253
  },
254
  {
255
  "epoch": 4.02,
256
+ "learning_rate": 0.00016010869565217392,
257
+ "loss": 0.5888,
258
  "step": 370
259
  },
260
  {
261
  "epoch": 4.13,
262
+ "learning_rate": 0.00015902173913043478,
263
+ "loss": 0.685,
264
  "step": 380
265
  },
266
  {
267
  "epoch": 4.24,
268
+ "learning_rate": 0.00015793478260869566,
269
+ "loss": 0.6937,
270
  "step": 390
271
  },
272
  {
273
  "epoch": 4.35,
274
+ "learning_rate": 0.00015684782608695654,
275
+ "loss": 0.5397,
276
  "step": 400
277
  },
278
  {
279
  "epoch": 4.35,
280
+ "eval_accuracy": 0.8923076923076924,
281
+ "eval_loss": 0.2977767884731293,
282
+ "eval_runtime": 1.8479,
283
+ "eval_samples_per_second": 140.701,
284
+ "eval_steps_per_second": 17.858,
285
  "step": 400
286
  },
287
  {
288
  "epoch": 4.46,
289
+ "learning_rate": 0.0001557608695652174,
290
+ "loss": 0.6058,
291
  "step": 410
292
  },
293
  {
294
  "epoch": 4.57,
295
+ "learning_rate": 0.00015467391304347828,
296
+ "loss": 0.553,
297
  "step": 420
298
  },
299
  {
300
  "epoch": 4.67,
301
+ "learning_rate": 0.00015358695652173914,
302
+ "loss": 0.4621,
303
  "step": 430
304
  },
305
  {
306
  "epoch": 4.78,
307
+ "learning_rate": 0.0001525,
308
+ "loss": 0.5002,
309
  "step": 440
310
  },
311
  {
312
  "epoch": 4.89,
313
+ "learning_rate": 0.00015141304347826088,
314
+ "loss": 0.4833,
315
  "step": 450
316
  },
317
  {
318
  "epoch": 5.0,
319
+ "learning_rate": 0.00015032608695652176,
320
+ "loss": 0.5952,
321
  "step": 460
322
  },
323
  {
324
  "epoch": 5.11,
325
+ "learning_rate": 0.00014923913043478262,
326
+ "loss": 0.5049,
327
  "step": 470
328
  },
329
  {
330
  "epoch": 5.22,
331
+ "learning_rate": 0.00014815217391304347,
332
+ "loss": 0.5219,
333
  "step": 480
334
  },
335
  {
336
  "epoch": 5.33,
337
+ "learning_rate": 0.00014706521739130436,
338
+ "loss": 0.6222,
339
  "step": 490
340
  },
341
  {
342
  "epoch": 5.43,
343
+ "learning_rate": 0.00014597826086956524,
344
+ "loss": 0.537,
345
  "step": 500
346
  },
347
  {
348
  "epoch": 5.43,
349
+ "eval_accuracy": 0.9423076923076923,
350
+ "eval_loss": 0.151279479265213,
351
+ "eval_runtime": 1.8783,
352
+ "eval_samples_per_second": 138.42,
353
+ "eval_steps_per_second": 17.569,
354
  "step": 500
355
  },
356
  {
357
  "epoch": 5.54,
358
+ "learning_rate": 0.0001448913043478261,
359
+ "loss": 0.498,
360
  "step": 510
361
  },
362
  {
363
  "epoch": 5.65,
364
+ "learning_rate": 0.00014380434782608695,
365
+ "loss": 0.5532,
366
  "step": 520
367
  },
368
  {
369
  "epoch": 5.76,
370
+ "learning_rate": 0.00014271739130434783,
371
+ "loss": 0.5434,
372
  "step": 530
373
  },
374
  {
375
  "epoch": 5.87,
376
+ "learning_rate": 0.00014163043478260872,
377
+ "loss": 0.6535,
378
  "step": 540
379
  },
380
  {
381
  "epoch": 5.98,
382
+ "learning_rate": 0.00014054347826086957,
383
+ "loss": 0.4965,
384
  "step": 550
385
  },
386
  {
387
  "epoch": 6.09,
388
+ "learning_rate": 0.00013945652173913043,
389
+ "loss": 0.6712,
390
  "step": 560
391
  },
392
  {
393
  "epoch": 6.2,
394
+ "learning_rate": 0.0001383695652173913,
395
+ "loss": 0.5683,
396
  "step": 570
397
  },
398
  {
399
  "epoch": 6.3,
400
+ "learning_rate": 0.00013728260869565217,
401
+ "loss": 0.3377,
402
  "step": 580
403
  },
404
  {
405
  "epoch": 6.41,
406
+ "learning_rate": 0.00013619565217391305,
407
+ "loss": 0.482,
408
  "step": 590
409
  },
410
  {
411
  "epoch": 6.52,
412
+ "learning_rate": 0.00013510869565217394,
413
+ "loss": 0.3766,
414
  "step": 600
415
  },
416
  {
417
  "epoch": 6.52,
418
+ "eval_accuracy": 0.8846153846153846,
419
+ "eval_loss": 0.4736884534358978,
420
+ "eval_runtime": 1.8513,
421
+ "eval_samples_per_second": 140.44,
422
+ "eval_steps_per_second": 17.825,
423
  "step": 600
424
  },
425
  {
426
  "epoch": 6.63,
427
+ "learning_rate": 0.0001340217391304348,
428
+ "loss": 0.5068,
429
  "step": 610
430
  },
431
  {
432
  "epoch": 6.74,
433
+ "learning_rate": 0.00013293478260869565,
434
+ "loss": 0.3019,
435
  "step": 620
436
  },
437
  {
438
  "epoch": 6.85,
439
+ "learning_rate": 0.00013184782608695653,
440
+ "loss": 0.4852,
441
  "step": 630
442
  },
443
  {
444
  "epoch": 6.96,
445
+ "learning_rate": 0.00013076086956521741,
446
+ "loss": 0.55,
447
  "step": 640
448
  },
449
  {
450
  "epoch": 7.07,
451
+ "learning_rate": 0.00012967391304347827,
452
+ "loss": 0.4122,
453
  "step": 650
454
  },
455
  {
456
  "epoch": 7.17,
457
+ "learning_rate": 0.00012858695652173913,
458
+ "loss": 0.3566,
459
  "step": 660
460
  },
461
  {
462
  "epoch": 7.28,
463
+ "learning_rate": 0.0001275,
464
+ "loss": 0.4279,
465
  "step": 670
466
  },
467
  {
468
  "epoch": 7.39,
469
+ "learning_rate": 0.0001264130434782609,
470
+ "loss": 0.3295,
471
  "step": 680
472
  },
473
  {
474
  "epoch": 7.5,
475
+ "learning_rate": 0.00012532608695652175,
476
+ "loss": 0.5113,
477
  "step": 690
478
  },
479
  {
480
  "epoch": 7.61,
481
+ "learning_rate": 0.0001242391304347826,
482
+ "loss": 0.3994,
483
  "step": 700
484
  },
485
  {
486
  "epoch": 7.61,
487
+ "eval_accuracy": 0.9115384615384615,
488
+ "eval_loss": 0.3060305714607239,
489
+ "eval_runtime": 1.8463,
490
+ "eval_samples_per_second": 140.821,
491
+ "eval_steps_per_second": 17.873,
492
  "step": 700
493
  },
494
  {
495
  "epoch": 7.72,
496
+ "learning_rate": 0.0001231521739130435,
497
+ "loss": 0.5572,
498
  "step": 710
499
  },
500
  {
501
  "epoch": 7.83,
502
+ "learning_rate": 0.00012206521739130434,
503
+ "loss": 0.2986,
504
  "step": 720
505
  },
506
  {
507
  "epoch": 7.93,
508
+ "learning_rate": 0.00012097826086956523,
509
+ "loss": 0.5146,
510
  "step": 730
511
  },
512
  {
513
  "epoch": 8.04,
514
+ "learning_rate": 0.0001198913043478261,
515
+ "loss": 0.4029,
516
  "step": 740
517
  },
518
  {
519
  "epoch": 8.15,
520
+ "learning_rate": 0.00011880434782608695,
521
+ "loss": 0.3901,
522
  "step": 750
523
  },
524
  {
525
  "epoch": 8.26,
526
+ "learning_rate": 0.00011771739130434782,
527
+ "loss": 0.5112,
528
  "step": 760
529
  },
530
  {
531
  "epoch": 8.37,
532
+ "learning_rate": 0.0001166304347826087,
533
+ "loss": 0.2704,
534
  "step": 770
535
  },
536
  {
537
  "epoch": 8.48,
538
+ "learning_rate": 0.00011554347826086958,
539
+ "loss": 0.3839,
540
  "step": 780
541
  },
542
  {
543
  "epoch": 8.59,
544
+ "learning_rate": 0.00011445652173913045,
545
+ "loss": 0.4988,
546
  "step": 790
547
  },
548
  {
549
  "epoch": 8.7,
550
+ "learning_rate": 0.0001133695652173913,
551
+ "loss": 0.2139,
552
  "step": 800
553
  },
554
  {
555
  "epoch": 8.7,
556
+ "eval_accuracy": 0.9576923076923077,
557
+ "eval_loss": 0.13448870182037354,
558
+ "eval_runtime": 1.9169,
559
+ "eval_samples_per_second": 135.638,
560
+ "eval_steps_per_second": 17.216,
561
  "step": 800
562
  },
563
  {
564
  "epoch": 8.8,
565
+ "learning_rate": 0.00011228260869565217,
566
+ "loss": 0.2857,
567
  "step": 810
568
  },
569
  {
570
  "epoch": 8.91,
571
+ "learning_rate": 0.00011119565217391305,
572
+ "loss": 0.2416,
573
  "step": 820
574
  },
575
  {
576
  "epoch": 9.02,
577
+ "learning_rate": 0.00011010869565217392,
578
+ "loss": 0.4803,
579
  "step": 830
580
  },
581
  {
582
  "epoch": 9.13,
583
+ "learning_rate": 0.00010902173913043478,
584
+ "loss": 0.4416,
585
  "step": 840
586
  },
587
  {
588
  "epoch": 9.24,
589
+ "learning_rate": 0.00010793478260869565,
590
+ "loss": 0.3271,
591
  "step": 850
592
  },
593
  {
594
  "epoch": 9.35,
595
+ "learning_rate": 0.00010684782608695653,
596
+ "loss": 0.2261,
597
  "step": 860
598
  },
599
  {
600
  "epoch": 9.46,
601
+ "learning_rate": 0.0001057608695652174,
602
+ "loss": 0.2063,
603
  "step": 870
604
  },
605
  {
606
  "epoch": 9.57,
607
+ "learning_rate": 0.00010467391304347827,
608
+ "loss": 0.4865,
609
  "step": 880
610
  },
611
  {
612
  "epoch": 9.67,
613
+ "learning_rate": 0.00010358695652173913,
614
+ "loss": 0.4903,
615
  "step": 890
616
  },
617
  {
618
  "epoch": 9.78,
619
+ "learning_rate": 0.0001025,
620
+ "loss": 0.2995,
621
  "step": 900
622
  },
623
  {
624
  "epoch": 9.78,
625
+ "eval_accuracy": 0.95,
626
+ "eval_loss": 0.155814990401268,
627
+ "eval_runtime": 1.8528,
628
+ "eval_samples_per_second": 140.328,
629
+ "eval_steps_per_second": 17.811,
630
  "step": 900
631
  },
632
  {
633
  "epoch": 9.89,
634
+ "learning_rate": 0.00010141304347826088,
635
+ "loss": 0.2847,
636
  "step": 910
637
  },
638
  {
639
  "epoch": 10.0,
640
+ "learning_rate": 0.00010032608695652175,
641
+ "loss": 0.2913,
642
  "step": 920
643
  },
644
  {
645
  "epoch": 10.11,
646
+ "learning_rate": 9.923913043478261e-05,
647
+ "loss": 0.2779,
648
  "step": 930
649
  },
650
  {
651
  "epoch": 10.22,
652
+ "learning_rate": 9.815217391304349e-05,
653
+ "loss": 0.2205,
654
  "step": 940
655
  },
656
  {
657
  "epoch": 10.33,
658
+ "learning_rate": 9.706521739130435e-05,
659
+ "loss": 0.405,
660
  "step": 950
661
  },
662
  {
663
  "epoch": 10.43,
664
+ "learning_rate": 9.597826086956522e-05,
665
+ "loss": 0.3613,
666
  "step": 960
667
  },
668
  {
669
  "epoch": 10.54,
670
+ "learning_rate": 9.48913043478261e-05,
671
+ "loss": 0.2381,
672
  "step": 970
673
  },
674
  {
675
  "epoch": 10.65,
676
+ "learning_rate": 9.380434782608696e-05,
677
+ "loss": 0.224,
678
  "step": 980
679
  },
680
  {
681
  "epoch": 10.76,
682
+ "learning_rate": 9.271739130434784e-05,
683
+ "loss": 0.2544,
684
  "step": 990
685
  },
686
  {
687
  "epoch": 10.87,
688
+ "learning_rate": 9.16304347826087e-05,
689
+ "loss": 0.2835,
690
  "step": 1000
691
  },
692
  {
693
  "epoch": 10.87,
694
+ "eval_accuracy": 0.9730769230769231,
695
+ "eval_loss": 0.09426813572645187,
696
+ "eval_runtime": 2.2351,
697
+ "eval_samples_per_second": 116.327,
698
+ "eval_steps_per_second": 14.765,
699
  "step": 1000
700
  },
701
  {
702
  "epoch": 10.98,
703
+ "learning_rate": 9.054347826086958e-05,
704
+ "loss": 0.4605,
705
  "step": 1010
706
  },
707
  {
708
  "epoch": 11.09,
709
+ "learning_rate": 8.945652173913043e-05,
710
+ "loss": 0.2259,
711
  "step": 1020
712
  },
713
  {
714
  "epoch": 11.2,
715
+ "learning_rate": 8.83695652173913e-05,
716
+ "loss": 0.2241,
717
  "step": 1030
718
  },
719
  {
720
  "epoch": 11.3,
721
+ "learning_rate": 8.728260869565217e-05,
722
+ "loss": 0.2173,
723
  "step": 1040
724
  },
725
  {
726
  "epoch": 11.41,
727
+ "learning_rate": 8.619565217391304e-05,
728
+ "loss": 0.3629,
729
  "step": 1050
730
  },
731
  {
732
  "epoch": 11.52,
733
+ "learning_rate": 8.510869565217393e-05,
734
+ "loss": 0.6198,
735
  "step": 1060
736
  },
737
  {
738
  "epoch": 11.63,
739
+ "learning_rate": 8.402173913043478e-05,
740
+ "loss": 0.5369,
741
  "step": 1070
742
  },
743
  {
744
  "epoch": 11.74,
745
+ "learning_rate": 8.293478260869567e-05,
746
+ "loss": 0.304,
747
  "step": 1080
748
  },
749
  {
750
  "epoch": 11.85,
751
+ "learning_rate": 8.184782608695652e-05,
752
+ "loss": 0.2756,
753
  "step": 1090
754
  },
755
  {
756
  "epoch": 11.96,
757
+ "learning_rate": 8.076086956521739e-05,
758
+ "loss": 0.3089,
759
  "step": 1100
760
  },
761
  {
762
  "epoch": 11.96,
763
+ "eval_accuracy": 0.9576923076923077,
764
+ "eval_loss": 0.09128668904304504,
765
+ "eval_runtime": 1.8693,
766
+ "eval_samples_per_second": 139.093,
767
+ "eval_steps_per_second": 17.654,
768
  "step": 1100
769
  },
770
  {
771
  "epoch": 12.07,
772
+ "learning_rate": 7.967391304347826e-05,
773
+ "loss": 0.3491,
774
  "step": 1110
775
  },
776
  {
777
  "epoch": 12.17,
778
+ "learning_rate": 7.858695652173913e-05,
779
+ "loss": 0.1677,
780
  "step": 1120
781
  },
782
  {
783
  "epoch": 12.28,
784
+ "learning_rate": 7.75e-05,
785
+ "loss": 0.2795,
786
  "step": 1130
787
  },
788
  {
789
  "epoch": 12.39,
790
+ "learning_rate": 7.641304347826087e-05,
791
+ "loss": 0.2714,
792
  "step": 1140
793
  },
794
  {
795
  "epoch": 12.5,
796
+ "learning_rate": 7.532608695652175e-05,
797
+ "loss": 0.302,
798
  "step": 1150
799
  },
800
  {
801
  "epoch": 12.61,
802
+ "learning_rate": 7.423913043478261e-05,
803
+ "loss": 0.3564,
804
  "step": 1160
805
  },
806
  {
807
  "epoch": 12.72,
808
+ "learning_rate": 7.32608695652174e-05,
809
+ "loss": 0.3085,
810
  "step": 1170
811
  },
812
  {
813
  "epoch": 12.83,
814
+ "learning_rate": 7.217391304347827e-05,
815
+ "loss": 0.2515,
816
  "step": 1180
817
  },
818
  {
819
  "epoch": 12.93,
820
+ "learning_rate": 7.108695652173914e-05,
821
+ "loss": 0.3075,
822
  "step": 1190
823
  },
824
  {
825
  "epoch": 13.04,
826
+ "learning_rate": 7e-05,
827
+ "loss": 0.3632,
828
  "step": 1200
829
  },
830
  {
831
  "epoch": 13.04,
832
+ "eval_accuracy": 0.9692307692307692,
833
+ "eval_loss": 0.08875690400600433,
834
+ "eval_runtime": 2.7533,
835
+ "eval_samples_per_second": 94.431,
836
+ "eval_steps_per_second": 11.986,
837
  "step": 1200
838
  },
839
  {
840
  "epoch": 13.15,
841
+ "learning_rate": 6.891304347826088e-05,
842
+ "loss": 0.3888,
843
  "step": 1210
844
  },
845
  {
846
  "epoch": 13.26,
847
+ "learning_rate": 6.782608695652173e-05,
848
+ "loss": 0.1683,
849
  "step": 1220
850
  },
851
  {
852
  "epoch": 13.37,
853
+ "learning_rate": 6.673913043478262e-05,
854
+ "loss": 0.3672,
855
  "step": 1230
856
  },
857
  {
858
  "epoch": 13.48,
859
+ "learning_rate": 6.565217391304349e-05,
860
+ "loss": 0.335,
861
  "step": 1240
862
  },
863
  {
864
  "epoch": 13.59,
865
+ "learning_rate": 6.456521739130436e-05,
866
+ "loss": 0.1861,
867
  "step": 1250
868
  },
869
  {
870
  "epoch": 13.7,
871
+ "learning_rate": 6.347826086956523e-05,
872
+ "loss": 0.3386,
873
  "step": 1260
874
  },
875
  {
876
  "epoch": 13.8,
877
+ "learning_rate": 6.239130434782608e-05,
878
+ "loss": 0.2222,
879
  "step": 1270
880
  },
881
  {
882
  "epoch": 13.91,
883
+ "learning_rate": 6.130434782608696e-05,
884
+ "loss": 0.1714,
885
  "step": 1280
886
  },
887
  {
888
  "epoch": 14.02,
889
+ "learning_rate": 6.021739130434783e-05,
890
+ "loss": 0.2699,
891
  "step": 1290
892
  },
893
  {
894
  "epoch": 14.13,
895
+ "learning_rate": 5.9130434782608704e-05,
896
+ "loss": 0.327,
897
  "step": 1300
898
  },
899
  {
900
  "epoch": 14.13,
901
+ "eval_accuracy": 0.9807692307692307,
902
+ "eval_loss": 0.10383553802967072,
903
+ "eval_runtime": 1.9077,
904
+ "eval_samples_per_second": 136.293,
905
+ "eval_steps_per_second": 17.299,
906
  "step": 1300
907
  },
908
  {
909
  "epoch": 14.24,
910
+ "learning_rate": 5.804347826086957e-05,
911
+ "loss": 0.224,
912
  "step": 1310
913
  },
914
  {
915
  "epoch": 14.35,
916
+ "learning_rate": 5.695652173913044e-05,
917
+ "loss": 0.378,
918
  "step": 1320
919
  },
920
  {
921
  "epoch": 14.46,
922
+ "learning_rate": 5.5869565217391306e-05,
923
+ "loss": 0.2503,
924
  "step": 1330
925
  },
926
  {
927
  "epoch": 14.57,
928
+ "learning_rate": 5.478260869565217e-05,
929
+ "loss": 0.1633,
930
  "step": 1340
931
  },
932
  {
933
  "epoch": 14.67,
934
+ "learning_rate": 5.3695652173913046e-05,
935
+ "loss": 0.2973,
936
  "step": 1350
937
  },
938
  {
939
  "epoch": 14.78,
940
+ "learning_rate": 5.260869565217391e-05,
941
+ "loss": 0.1887,
942
  "step": 1360
943
  },
944
  {
945
  "epoch": 14.89,
946
+ "learning_rate": 5.1521739130434785e-05,
947
+ "loss": 0.2953,
948
  "step": 1370
949
  },
950
  {
951
  "epoch": 15.0,
952
+ "learning_rate": 5.0434782608695655e-05,
953
+ "loss": 0.248,
954
  "step": 1380
955
  },
956
  {
957
  "epoch": 15.11,
958
+ "learning_rate": 4.9347826086956524e-05,
959
+ "loss": 0.152,
960
  "step": 1390
961
  },
962
  {
963
  "epoch": 15.22,
964
+ "learning_rate": 4.8260869565217394e-05,
965
+ "loss": 0.313,
966
  "step": 1400
967
  },
968
  {
969
  "epoch": 15.22,
970
+ "eval_accuracy": 0.9730769230769231,
971
+ "eval_loss": 0.09758734703063965,
972
+ "eval_runtime": 2.57,
973
+ "eval_samples_per_second": 101.167,
974
+ "eval_steps_per_second": 12.84,
975
  "step": 1400
976
  },
977
  {
978
  "epoch": 15.33,
979
+ "learning_rate": 4.7173913043478264e-05,
980
+ "loss": 0.2301,
981
  "step": 1410
982
  },
983
  {
984
  "epoch": 15.43,
985
+ "learning_rate": 4.608695652173913e-05,
986
+ "loss": 0.1671,
987
  "step": 1420
988
  },
989
  {
990
  "epoch": 15.54,
991
+ "learning_rate": 4.5e-05,
992
+ "loss": 0.2399,
993
  "step": 1430
994
  },
995
  {
996
  "epoch": 15.65,
997
+ "learning_rate": 4.391304347826087e-05,
998
+ "loss": 0.278,
999
  "step": 1440
1000
  },
1001
  {
1002
  "epoch": 15.76,
1003
+ "learning_rate": 4.282608695652174e-05,
1004
+ "loss": 0.3062,
1005
  "step": 1450
1006
  },
1007
  {
1008
  "epoch": 15.87,
1009
+ "learning_rate": 4.1739130434782605e-05,
1010
+ "loss": 0.3704,
1011
  "step": 1460
1012
  },
1013
  {
1014
  "epoch": 15.98,
1015
+ "learning_rate": 4.065217391304348e-05,
1016
+ "loss": 0.231,
1017
  "step": 1470
1018
  },
1019
  {
1020
  "epoch": 16.09,
1021
+ "learning_rate": 3.956521739130435e-05,
1022
+ "loss": 0.2339,
1023
  "step": 1480
1024
  },
1025
  {
1026
  "epoch": 16.2,
1027
+ "learning_rate": 3.847826086956522e-05,
1028
+ "loss": 0.2079,
1029
  "step": 1490
1030
  },
1031
  {
1032
  "epoch": 16.3,
1033
+ "learning_rate": 3.739130434782609e-05,
1034
+ "loss": 0.1752,
1035
  "step": 1500
1036
  },
1037
  {
1038
  "epoch": 16.3,
1039
+ "eval_accuracy": 0.9807692307692307,
1040
+ "eval_loss": 0.050352372229099274,
1041
+ "eval_runtime": 1.9714,
1042
+ "eval_samples_per_second": 131.889,
1043
+ "eval_steps_per_second": 16.74,
1044
  "step": 1500
1045
  },
1046
  {
1047
  "epoch": 16.41,
1048
+ "learning_rate": 3.630434782608696e-05,
1049
+ "loss": 0.1359,
1050
  "step": 1510
1051
  },
1052
  {
1053
  "epoch": 16.52,
1054
+ "learning_rate": 3.521739130434783e-05,
1055
+ "loss": 0.1186,
1056
  "step": 1520
1057
  },
1058
  {
1059
  "epoch": 16.63,
1060
+ "learning_rate": 3.413043478260869e-05,
1061
+ "loss": 0.2223,
1062
  "step": 1530
1063
  },
1064
  {
1065
  "epoch": 16.74,
1066
+ "learning_rate": 3.304347826086956e-05,
1067
+ "loss": 0.3185,
1068
  "step": 1540
1069
  },
1070
  {
1071
  "epoch": 16.85,
1072
+ "learning_rate": 3.195652173913043e-05,
1073
+ "loss": 0.1554,
1074
  "step": 1550
1075
  },
1076
  {
1077
  "epoch": 16.96,
1078
+ "learning_rate": 3.086956521739131e-05,
1079
+ "loss": 0.2177,
1080
  "step": 1560
1081
  },
1082
  {
1083
  "epoch": 17.07,
1084
+ "learning_rate": 2.9782608695652175e-05,
1085
+ "loss": 0.167,
1086
  "step": 1570
1087
  },
1088
  {
1089
  "epoch": 17.17,
1090
+ "learning_rate": 2.8695652173913044e-05,
1091
+ "loss": 0.216,
1092
  "step": 1580
1093
  },
1094
  {
1095
  "epoch": 17.28,
1096
+ "learning_rate": 2.7608695652173917e-05,
1097
+ "loss": 0.2664,
1098
  "step": 1590
1099
  },
1100
  {
1101
  "epoch": 17.39,
1102
+ "learning_rate": 2.6521739130434787e-05,
1103
+ "loss": 0.2397,
1104
  "step": 1600
1105
  },
1106
  {
1107
  "epoch": 17.39,
1108
+ "eval_accuracy": 0.9807692307692307,
1109
+ "eval_loss": 0.06116783991456032,
1110
+ "eval_runtime": 1.9643,
1111
+ "eval_samples_per_second": 132.362,
1112
+ "eval_steps_per_second": 16.8,
1113
  "step": 1600
1114
  },
1115
  {
1116
  "epoch": 17.5,
1117
+ "learning_rate": 2.543478260869565e-05,
1118
+ "loss": 0.1432,
1119
  "step": 1610
1120
  },
1121
  {
1122
  "epoch": 17.61,
1123
+ "learning_rate": 2.4347826086956523e-05,
1124
+ "loss": 0.2176,
1125
  "step": 1620
1126
  },
1127
  {
1128
  "epoch": 17.72,
1129
+ "learning_rate": 2.3260869565217393e-05,
1130
+ "loss": 0.296,
1131
  "step": 1630
1132
  },
1133
  {
1134
  "epoch": 17.83,
1135
+ "learning_rate": 2.2173913043478262e-05,
1136
+ "loss": 0.2308,
1137
  "step": 1640
1138
  },
1139
  {
1140
  "epoch": 17.93,
1141
+ "learning_rate": 2.1086956521739132e-05,
1142
+ "loss": 0.1676,
1143
  "step": 1650
1144
  },
1145
  {
1146
  "epoch": 18.04,
1147
+ "learning_rate": 2e-05,
1148
+ "loss": 0.1274,
1149
  "step": 1660
1150
  },
1151
  {
1152
  "epoch": 18.15,
1153
+ "learning_rate": 1.8913043478260868e-05,
1154
+ "loss": 0.179,
1155
  "step": 1670
1156
  },
1157
  {
1158
  "epoch": 18.26,
1159
+ "learning_rate": 1.782608695652174e-05,
1160
+ "loss": 0.1569,
1161
  "step": 1680
1162
  },
1163
  {
1164
  "epoch": 18.37,
1165
+ "learning_rate": 1.673913043478261e-05,
1166
+ "loss": 0.1897,
1167
  "step": 1690
1168
  },
1169
  {
1170
  "epoch": 18.48,
1171
+ "learning_rate": 1.565217391304348e-05,
1172
+ "loss": 0.1348,
1173
  "step": 1700
1174
  },
1175
  {
1176
  "epoch": 18.48,
1177
+ "eval_accuracy": 0.9846153846153847,
1178
+ "eval_loss": 0.05584708973765373,
1179
+ "eval_runtime": 1.9494,
1180
+ "eval_samples_per_second": 133.373,
1181
+ "eval_steps_per_second": 16.928,
1182
  "step": 1700
1183
  },
1184
  {
1185
  "epoch": 18.59,
1186
+ "learning_rate": 1.4565217391304348e-05,
1187
+ "loss": 0.1662,
1188
  "step": 1710
1189
  },
1190
  {
1191
  "epoch": 18.7,
1192
+ "learning_rate": 1.3478260869565218e-05,
1193
+ "loss": 0.1644,
1194
  "step": 1720
1195
  },
1196
  {
1197
  "epoch": 18.8,
1198
+ "learning_rate": 1.2391304347826088e-05,
1199
+ "loss": 0.2025,
1200
  "step": 1730
1201
  },
1202
  {
1203
  "epoch": 18.91,
1204
+ "learning_rate": 1.1304347826086957e-05,
1205
+ "loss": 0.1745,
1206
  "step": 1740
1207
  },
1208
  {
1209
  "epoch": 19.02,
1210
+ "learning_rate": 1.0217391304347827e-05,
1211
+ "loss": 0.1786,
1212
  "step": 1750
1213
  },
1214
  {
1215
  "epoch": 19.13,
1216
+ "learning_rate": 9.130434782608697e-06,
1217
+ "loss": 0.2791,
1218
  "step": 1760
1219
  },
1220
  {
1221
  "epoch": 19.24,
1222
+ "learning_rate": 8.043478260869565e-06,
1223
+ "loss": 0.2056,
1224
  "step": 1770
1225
  },
1226
  {
1227
  "epoch": 19.35,
1228
+ "learning_rate": 6.956521739130435e-06,
1229
+ "loss": 0.2616,
1230
  "step": 1780
1231
  },
1232
  {
1233
  "epoch": 19.46,
1234
+ "learning_rate": 5.869565217391305e-06,
1235
+ "loss": 0.1488,
1236
  "step": 1790
1237
  },
1238
  {
1239
  "epoch": 19.57,
1240
+ "learning_rate": 4.782608695652174e-06,
1241
+ "loss": 0.2842,
1242
  "step": 1800
1243
  },
1244
  {
1245
  "epoch": 19.57,
1246
+ "eval_accuracy": 0.9769230769230769,
1247
+ "eval_loss": 0.05035410821437836,
1248
+ "eval_runtime": 1.9009,
1249
+ "eval_samples_per_second": 136.776,
1250
+ "eval_steps_per_second": 17.36,
1251
  "step": 1800
1252
  },
1253
  {
1254
  "epoch": 19.67,
1255
+ "learning_rate": 3.695652173913044e-06,
1256
+ "loss": 0.1487,
1257
  "step": 1810
1258
  },
1259
  {
1260
  "epoch": 19.78,
1261
+ "learning_rate": 2.608695652173913e-06,
1262
+ "loss": 0.1818,
1263
  "step": 1820
1264
  },
1265
  {
1266
  "epoch": 19.89,
1267
+ "learning_rate": 1.5217391304347827e-06,
1268
+ "loss": 0.1469,
1269
  "step": 1830
1270
  },
1271
  {
1272
  "epoch": 20.0,
1273
+ "learning_rate": 4.347826086956522e-07,
1274
+ "loss": 0.213,
1275
  "step": 1840
1276
  },
1277
  {
1278
+ "epoch": 20.0,
1279
+ "step": 1840,
1280
+ "total_flos": 7.30261234607063e+17,
1281
+ "train_loss": 0.5342570722103119,
1282
+ "train_runtime": 409.3983,
1283
+ "train_samples_per_second": 71.715,
1284
+ "train_steps_per_second": 4.494
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1285
  }
1286
  ],
1287
  "logging_steps": 10,
1288
+ "max_steps": 1840,
1289
+ "num_train_epochs": 20,
1290
  "save_steps": 100,
1291
+ "total_flos": 7.30261234607063e+17,
1292
  "trial_name": null,
1293
  "trial_params": null
1294
  }