youngp5 commited on
Commit
5eb61aa
1 Parent(s): 8080ec9

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +98 -461
  6. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48e02db927d5123e3ae1c830db167827df07a85226a2c7faf31e17af0a0366b8
3
  size 686525061
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4373ca3f29c182403e6a4581c43a863b8b58292755d198fe1fc834ccd084a6ce
3
  size 686525061
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a51a5a533fd7542d18145c6853de93f25ee3d709d53ec729f2684e7565340f3d
3
  size 343271789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de22ee000a33cc4c27096c9cfabd9c972fb27bbd412e0581dbdd28cd7229ca79
3
  size 343271789
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab62043be50b93d4eb28964be2d945176db3d64fe73ddd052a7656ba9141c683
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dd3a816ab8628e6038ecf426e93a907752049203fbc39b63fcde557182a866f
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7be74ea536e79eb53304c8ad7665b1808b95e8a71eefc3a101477593346ec95
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b081c596a28e61a0c2c71b36c558646038f30339afa5e1f02c6c82300b54ce5e
3
  size 627
trainer_state.json CHANGED
@@ -1,610 +1,247 @@
1
  {
2
- "best_metric": 0.03899691626429558,
3
- "best_model_checkpoint": "./vit-base-beans/checkpoint-720",
4
- "epoch": 1.9565217391304348,
5
- "global_step": 720,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.03,
12
- "learning_rate": 0.0001972826086956522,
13
- "loss": 0.5574,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.05,
18
- "learning_rate": 0.00019456521739130434,
19
- "loss": 0.3197,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.08,
24
- "learning_rate": 0.00019184782608695653,
25
- "loss": 0.271,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.11,
30
- "learning_rate": 0.0001891304347826087,
31
- "loss": 0.2484,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.11,
36
- "eval_accuracy": 0.8801089918256131,
37
- "eval_loss": 0.21790249645709991,
38
- "eval_runtime": 16.4161,
39
- "eval_samples_per_second": 44.712,
40
- "eval_steps_per_second": 5.604,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 0.14,
45
- "learning_rate": 0.0001864130434782609,
46
- "loss": 0.2372,
47
  "step": 50
48
  },
49
  {
50
  "epoch": 0.16,
51
- "learning_rate": 0.00018369565217391304,
52
- "loss": 0.1945,
53
  "step": 60
54
  },
55
  {
56
  "epoch": 0.19,
57
- "learning_rate": 0.00018097826086956522,
58
- "loss": 0.2186,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.22,
63
- "learning_rate": 0.0001782608695652174,
64
- "loss": 0.3033,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.22,
69
- "eval_accuracy": 0.8678474114441417,
70
- "eval_loss": 0.28713682293891907,
71
- "eval_runtime": 18.6159,
72
- "eval_samples_per_second": 39.429,
73
- "eval_steps_per_second": 4.942,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 0.24,
78
- "learning_rate": 0.00017554347826086956,
79
- "loss": 0.2893,
80
  "step": 90
81
  },
82
  {
83
  "epoch": 0.27,
84
- "learning_rate": 0.00017282608695652174,
85
- "loss": 0.3135,
86
  "step": 100
87
  },
88
  {
89
  "epoch": 0.3,
90
- "learning_rate": 0.00017010869565217392,
91
- "loss": 0.2673,
92
  "step": 110
93
  },
94
  {
95
  "epoch": 0.33,
96
- "learning_rate": 0.0001673913043478261,
97
- "loss": 0.2709,
98
  "step": 120
99
  },
100
  {
101
  "epoch": 0.33,
102
- "eval_accuracy": 0.9073569482288828,
103
- "eval_loss": 0.23233647644519806,
104
- "eval_runtime": 16.7819,
105
- "eval_samples_per_second": 43.738,
106
- "eval_steps_per_second": 5.482,
107
  "step": 120
108
  },
109
  {
110
  "epoch": 0.35,
111
- "learning_rate": 0.00016467391304347828,
112
- "loss": 0.2171,
113
  "step": 130
114
  },
115
  {
116
  "epoch": 0.38,
117
- "learning_rate": 0.00016195652173913046,
118
- "loss": 0.2393,
119
  "step": 140
120
  },
121
  {
122
  "epoch": 0.41,
123
- "learning_rate": 0.00015923913043478262,
124
- "loss": 0.1934,
125
  "step": 150
126
  },
127
  {
128
  "epoch": 0.43,
129
- "learning_rate": 0.0001565217391304348,
130
- "loss": 0.1878,
131
  "step": 160
132
  },
133
  {
134
  "epoch": 0.43,
135
- "eval_accuracy": 0.9318801089918256,
136
- "eval_loss": 0.1636728048324585,
137
- "eval_runtime": 16.6111,
138
- "eval_samples_per_second": 44.187,
139
- "eval_steps_per_second": 5.538,
140
  "step": 160
141
  },
142
  {
143
  "epoch": 0.46,
144
- "learning_rate": 0.00015380434782608698,
145
- "loss": 0.2186,
146
  "step": 170
147
  },
148
  {
149
  "epoch": 0.49,
150
- "learning_rate": 0.00015108695652173913,
151
- "loss": 0.1952,
152
  "step": 180
153
  },
154
  {
155
  "epoch": 0.52,
156
- "learning_rate": 0.0001483695652173913,
157
- "loss": 0.1546,
158
  "step": 190
159
  },
160
  {
161
  "epoch": 0.54,
162
- "learning_rate": 0.0001456521739130435,
163
- "loss": 0.1655,
164
  "step": 200
165
  },
166
  {
167
  "epoch": 0.54,
168
- "eval_accuracy": 0.8678474114441417,
169
- "eval_loss": 0.24226485192775726,
170
- "eval_runtime": 17.096,
171
- "eval_samples_per_second": 42.934,
172
- "eval_steps_per_second": 5.381,
173
  "step": 200
174
  },
175
  {
176
  "epoch": 0.57,
177
- "learning_rate": 0.00014293478260869567,
178
- "loss": 0.1843,
179
  "step": 210
180
  },
181
  {
182
  "epoch": 0.6,
183
- "learning_rate": 0.00014021739130434783,
184
- "loss": 0.12,
185
  "step": 220
186
  },
187
  {
188
  "epoch": 0.62,
189
- "learning_rate": 0.0001375,
190
- "loss": 0.1686,
191
  "step": 230
192
  },
193
  {
194
  "epoch": 0.65,
195
- "learning_rate": 0.0001347826086956522,
196
- "loss": 0.2393,
197
  "step": 240
198
  },
199
  {
200
  "epoch": 0.65,
201
- "eval_accuracy": 0.94141689373297,
202
- "eval_loss": 0.1487017571926117,
203
- "eval_runtime": 16.7397,
204
- "eval_samples_per_second": 43.848,
205
- "eval_steps_per_second": 5.496,
206
  "step": 240
207
  },
208
  {
209
  "epoch": 0.68,
210
- "learning_rate": 0.00013206521739130434,
211
- "loss": 0.2159,
212
  "step": 250
213
  },
214
  {
215
  "epoch": 0.71,
216
- "learning_rate": 0.00012934782608695652,
217
- "loss": 0.1608,
218
  "step": 260
219
  },
220
  {
221
  "epoch": 0.73,
222
- "learning_rate": 0.0001266304347826087,
223
- "loss": 0.1508,
224
  "step": 270
225
  },
226
  {
227
  "epoch": 0.76,
228
- "learning_rate": 0.00012391304347826086,
229
- "loss": 0.1432,
230
  "step": 280
231
  },
232
  {
233
  "epoch": 0.76,
234
- "eval_accuracy": 0.9209809264305178,
235
- "eval_loss": 0.1679653823375702,
236
- "eval_runtime": 19.1578,
237
- "eval_samples_per_second": 38.313,
238
- "eval_steps_per_second": 4.802,
239
- "step": 280
240
- },
241
- {
242
- "epoch": 0.79,
243
- "learning_rate": 0.00012119565217391304,
244
- "loss": 0.1852,
245
- "step": 290
246
- },
247
- {
248
- "epoch": 0.82,
249
- "learning_rate": 0.00011847826086956522,
250
- "loss": 0.1845,
251
- "step": 300
252
- },
253
- {
254
- "epoch": 0.84,
255
- "learning_rate": 0.00011576086956521739,
256
- "loss": 0.1238,
257
- "step": 310
258
- },
259
- {
260
- "epoch": 0.87,
261
- "learning_rate": 0.00011304347826086956,
262
- "loss": 0.0986,
263
- "step": 320
264
- },
265
- {
266
- "epoch": 0.87,
267
- "eval_accuracy": 0.9604904632152589,
268
- "eval_loss": 0.11554770916700363,
269
- "eval_runtime": 16.799,
270
- "eval_samples_per_second": 43.693,
271
- "eval_steps_per_second": 5.477,
272
- "step": 320
273
- },
274
- {
275
- "epoch": 0.9,
276
- "learning_rate": 0.00011032608695652174,
277
- "loss": 0.1781,
278
- "step": 330
279
- },
280
- {
281
- "epoch": 0.92,
282
- "learning_rate": 0.0001076086956521739,
283
- "loss": 0.1229,
284
- "step": 340
285
- },
286
- {
287
- "epoch": 0.95,
288
- "learning_rate": 0.0001048913043478261,
289
- "loss": 0.1238,
290
- "step": 350
291
- },
292
- {
293
- "epoch": 0.98,
294
- "learning_rate": 0.00010217391304347828,
295
- "loss": 0.0729,
296
- "step": 360
297
- },
298
- {
299
- "epoch": 0.98,
300
- "eval_accuracy": 0.9577656675749319,
301
- "eval_loss": 0.10841722786426544,
302
- "eval_runtime": 17.3667,
303
- "eval_samples_per_second": 42.265,
304
- "eval_steps_per_second": 5.297,
305
- "step": 360
306
- },
307
- {
308
- "epoch": 1.01,
309
- "learning_rate": 9.945652173913043e-05,
310
- "loss": 0.1156,
311
- "step": 370
312
- },
313
- {
314
- "epoch": 1.03,
315
- "learning_rate": 9.673913043478261e-05,
316
- "loss": 0.0934,
317
- "step": 380
318
- },
319
- {
320
- "epoch": 1.06,
321
- "learning_rate": 9.402173913043478e-05,
322
- "loss": 0.082,
323
- "step": 390
324
- },
325
- {
326
- "epoch": 1.09,
327
- "learning_rate": 9.130434782608696e-05,
328
- "loss": 0.0553,
329
- "step": 400
330
- },
331
- {
332
- "epoch": 1.09,
333
- "eval_accuracy": 0.9632152588555858,
334
- "eval_loss": 0.09444452077150345,
335
- "eval_runtime": 16.7805,
336
- "eval_samples_per_second": 43.741,
337
- "eval_steps_per_second": 5.483,
338
- "step": 400
339
- },
340
- {
341
- "epoch": 1.11,
342
- "learning_rate": 8.858695652173914e-05,
343
- "loss": 0.1067,
344
- "step": 410
345
- },
346
- {
347
- "epoch": 1.14,
348
- "learning_rate": 8.586956521739131e-05,
349
- "loss": 0.0544,
350
- "step": 420
351
- },
352
- {
353
- "epoch": 1.17,
354
- "learning_rate": 8.315217391304349e-05,
355
- "loss": 0.1047,
356
- "step": 430
357
- },
358
- {
359
- "epoch": 1.2,
360
- "learning_rate": 8.043478260869566e-05,
361
- "loss": 0.0395,
362
- "step": 440
363
- },
364
- {
365
- "epoch": 1.2,
366
- "eval_accuracy": 0.9673024523160763,
367
- "eval_loss": 0.07385484129190445,
368
- "eval_runtime": 17.3745,
369
- "eval_samples_per_second": 42.246,
370
- "eval_steps_per_second": 5.295,
371
- "step": 440
372
- },
373
- {
374
- "epoch": 1.22,
375
- "learning_rate": 7.771739130434783e-05,
376
- "loss": 0.0566,
377
- "step": 450
378
- },
379
- {
380
- "epoch": 1.25,
381
- "learning_rate": 7.500000000000001e-05,
382
- "loss": 0.0625,
383
- "step": 460
384
- },
385
- {
386
- "epoch": 1.28,
387
- "learning_rate": 7.228260869565217e-05,
388
- "loss": 0.0961,
389
- "step": 470
390
- },
391
- {
392
- "epoch": 1.3,
393
- "learning_rate": 6.956521739130436e-05,
394
- "loss": 0.0178,
395
- "step": 480
396
- },
397
- {
398
- "epoch": 1.3,
399
- "eval_accuracy": 0.9673024523160763,
400
- "eval_loss": 0.10838904976844788,
401
- "eval_runtime": 16.8091,
402
- "eval_samples_per_second": 43.667,
403
- "eval_steps_per_second": 5.473,
404
- "step": 480
405
- },
406
- {
407
- "epoch": 1.33,
408
- "learning_rate": 6.684782608695652e-05,
409
- "loss": 0.0641,
410
- "step": 490
411
- },
412
- {
413
- "epoch": 1.36,
414
- "learning_rate": 6.413043478260869e-05,
415
- "loss": 0.0497,
416
- "step": 500
417
- },
418
- {
419
- "epoch": 1.39,
420
- "learning_rate": 6.141304347826087e-05,
421
- "loss": 0.0364,
422
- "step": 510
423
- },
424
- {
425
- "epoch": 1.41,
426
- "learning_rate": 5.869565217391305e-05,
427
- "loss": 0.0273,
428
- "step": 520
429
- },
430
- {
431
- "epoch": 1.41,
432
- "eval_accuracy": 0.9727520435967303,
433
- "eval_loss": 0.07216904312372208,
434
- "eval_runtime": 17.0234,
435
- "eval_samples_per_second": 43.117,
436
- "eval_steps_per_second": 5.404,
437
- "step": 520
438
- },
439
- {
440
- "epoch": 1.44,
441
- "learning_rate": 5.5978260869565226e-05,
442
- "loss": 0.0558,
443
- "step": 530
444
- },
445
- {
446
- "epoch": 1.47,
447
- "learning_rate": 5.32608695652174e-05,
448
- "loss": 0.0379,
449
- "step": 540
450
- },
451
- {
452
- "epoch": 1.49,
453
- "learning_rate": 5.054347826086957e-05,
454
- "loss": 0.0569,
455
- "step": 550
456
- },
457
- {
458
- "epoch": 1.52,
459
- "learning_rate": 4.782608695652174e-05,
460
- "loss": 0.0273,
461
- "step": 560
462
- },
463
- {
464
- "epoch": 1.52,
465
  "eval_accuracy": 0.9754768392370572,
466
- "eval_loss": 0.0689365416765213,
467
- "eval_runtime": 16.7035,
468
- "eval_samples_per_second": 43.943,
469
- "eval_steps_per_second": 5.508,
470
- "step": 560
471
- },
472
- {
473
- "epoch": 1.55,
474
- "learning_rate": 4.5108695652173916e-05,
475
- "loss": 0.0087,
476
- "step": 570
477
- },
478
- {
479
- "epoch": 1.58,
480
- "learning_rate": 4.239130434782609e-05,
481
- "loss": 0.0347,
482
- "step": 580
483
- },
484
- {
485
- "epoch": 1.6,
486
- "learning_rate": 3.9673913043478264e-05,
487
- "loss": 0.0196,
488
- "step": 590
489
- },
490
- {
491
- "epoch": 1.63,
492
- "learning_rate": 3.695652173913043e-05,
493
- "loss": 0.0271,
494
- "step": 600
495
- },
496
- {
497
- "epoch": 1.63,
498
- "eval_accuracy": 0.9795640326975477,
499
- "eval_loss": 0.07317620515823364,
500
- "eval_runtime": 16.8063,
501
- "eval_samples_per_second": 43.674,
502
- "eval_steps_per_second": 5.474,
503
- "step": 600
504
- },
505
- {
506
- "epoch": 1.66,
507
- "learning_rate": 3.423913043478261e-05,
508
- "loss": 0.0951,
509
- "step": 610
510
- },
511
- {
512
- "epoch": 1.68,
513
- "learning_rate": 3.152173913043479e-05,
514
- "loss": 0.0726,
515
- "step": 620
516
- },
517
- {
518
- "epoch": 1.71,
519
- "learning_rate": 2.8804347826086957e-05,
520
- "loss": 0.01,
521
- "step": 630
522
- },
523
- {
524
- "epoch": 1.74,
525
- "learning_rate": 2.608695652173913e-05,
526
- "loss": 0.0294,
527
- "step": 640
528
- },
529
- {
530
- "epoch": 1.74,
531
- "eval_accuracy": 0.9822888283378747,
532
- "eval_loss": 0.057586103677749634,
533
- "eval_runtime": 17.373,
534
- "eval_samples_per_second": 42.249,
535
- "eval_steps_per_second": 5.296,
536
- "step": 640
537
- },
538
- {
539
- "epoch": 1.77,
540
- "learning_rate": 2.3369565217391306e-05,
541
- "loss": 0.017,
542
- "step": 650
543
- },
544
- {
545
- "epoch": 1.79,
546
- "learning_rate": 2.065217391304348e-05,
547
- "loss": 0.0271,
548
- "step": 660
549
- },
550
- {
551
- "epoch": 1.82,
552
- "learning_rate": 1.793478260869565e-05,
553
- "loss": 0.0572,
554
- "step": 670
555
- },
556
- {
557
- "epoch": 1.85,
558
- "learning_rate": 1.5217391304347828e-05,
559
- "loss": 0.0449,
560
- "step": 680
561
- },
562
- {
563
- "epoch": 1.85,
564
- "eval_accuracy": 0.9877384196185286,
565
- "eval_loss": 0.04121558368206024,
566
- "eval_runtime": 16.5738,
567
- "eval_samples_per_second": 44.287,
568
- "eval_steps_per_second": 5.551,
569
- "step": 680
570
- },
571
- {
572
- "epoch": 1.88,
573
- "learning_rate": 1.25e-05,
574
- "loss": 0.0261,
575
- "step": 690
576
- },
577
- {
578
- "epoch": 1.9,
579
- "learning_rate": 9.782608695652175e-06,
580
- "loss": 0.0091,
581
- "step": 700
582
- },
583
- {
584
- "epoch": 1.93,
585
- "learning_rate": 7.065217391304347e-06,
586
- "loss": 0.0799,
587
- "step": 710
588
- },
589
- {
590
- "epoch": 1.96,
591
- "learning_rate": 4.347826086956522e-06,
592
- "loss": 0.0076,
593
- "step": 720
594
- },
595
- {
596
- "epoch": 1.96,
597
- "eval_accuracy": 0.9877384196185286,
598
- "eval_loss": 0.03899691626429558,
599
- "eval_runtime": 17.1545,
600
- "eval_samples_per_second": 42.788,
601
- "eval_steps_per_second": 5.363,
602
- "step": 720
603
  }
604
  ],
605
- "max_steps": 736,
606
- "num_train_epochs": 2,
607
- "total_flos": 8.915533311547699e+17,
608
  "trial_name": null,
609
  "trial_params": null
610
  }
 
1
  {
2
+ "best_metric": 0.07841455936431885,
3
+ "best_model_checkpoint": "./vit-base-beans/checkpoint-280",
4
+ "epoch": 0.7608695652173914,
5
+ "global_step": 280,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.03,
12
+ "learning_rate": 0.00019322033898305085,
13
+ "loss": 0.6554,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.05,
18
+ "learning_rate": 0.0001864406779661017,
19
+ "loss": 0.306,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.08,
24
+ "learning_rate": 0.00017966101694915257,
25
+ "loss": 0.267,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.11,
30
+ "learning_rate": 0.00017288135593220342,
31
+ "loss": 0.2825,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.11,
36
+ "eval_accuracy": 0.9318801089918256,
37
+ "eval_loss": 0.2294050008058548,
38
+ "eval_runtime": 16.8062,
39
+ "eval_samples_per_second": 43.674,
40
+ "eval_steps_per_second": 5.474,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 0.14,
45
+ "learning_rate": 0.00016610169491525423,
46
+ "loss": 0.3097,
47
  "step": 50
48
  },
49
  {
50
  "epoch": 0.16,
51
+ "learning_rate": 0.00015932203389830508,
52
+ "loss": 0.2772,
53
  "step": 60
54
  },
55
  {
56
  "epoch": 0.19,
57
+ "learning_rate": 0.00015254237288135592,
58
+ "loss": 0.1711,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.22,
63
+ "learning_rate": 0.00014576271186440677,
64
+ "loss": 0.1483,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.22,
69
+ "eval_accuracy": 0.944141689373297,
70
+ "eval_loss": 0.16020944714546204,
71
+ "eval_runtime": 17.3798,
72
+ "eval_samples_per_second": 42.233,
73
+ "eval_steps_per_second": 5.294,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 0.24,
78
+ "learning_rate": 0.00013898305084745764,
79
+ "loss": 0.2356,
80
  "step": 90
81
  },
82
  {
83
  "epoch": 0.27,
84
+ "learning_rate": 0.00013220338983050849,
85
+ "loss": 0.1144,
86
  "step": 100
87
  },
88
  {
89
  "epoch": 0.3,
90
+ "learning_rate": 0.00012542372881355933,
91
+ "loss": 0.2216,
92
  "step": 110
93
  },
94
  {
95
  "epoch": 0.33,
96
+ "learning_rate": 0.00011864406779661017,
97
+ "loss": 0.2209,
98
  "step": 120
99
  },
100
  {
101
  "epoch": 0.33,
102
+ "eval_accuracy": 0.829700272479564,
103
+ "eval_loss": 0.29767370223999023,
104
+ "eval_runtime": 17.13,
105
+ "eval_samples_per_second": 42.849,
106
+ "eval_steps_per_second": 5.371,
107
  "step": 120
108
  },
109
  {
110
  "epoch": 0.35,
111
+ "learning_rate": 0.00011186440677966102,
112
+ "loss": 0.2153,
113
  "step": 130
114
  },
115
  {
116
  "epoch": 0.38,
117
+ "learning_rate": 0.00010508474576271188,
118
+ "loss": 0.1693,
119
  "step": 140
120
  },
121
  {
122
  "epoch": 0.41,
123
+ "learning_rate": 9.830508474576272e-05,
124
+ "loss": 0.1625,
125
  "step": 150
126
  },
127
  {
128
  "epoch": 0.43,
129
+ "learning_rate": 9.152542372881357e-05,
130
+ "loss": 0.1384,
131
  "step": 160
132
  },
133
  {
134
  "epoch": 0.43,
135
+ "eval_accuracy": 0.9645776566757494,
136
+ "eval_loss": 0.10578873753547668,
137
+ "eval_runtime": 16.6083,
138
+ "eval_samples_per_second": 44.195,
139
+ "eval_steps_per_second": 5.539,
140
  "step": 160
141
  },
142
  {
143
  "epoch": 0.46,
144
+ "learning_rate": 8.474576271186441e-05,
145
+ "loss": 0.1371,
146
  "step": 170
147
  },
148
  {
149
  "epoch": 0.49,
150
+ "learning_rate": 7.796610169491526e-05,
151
+ "loss": 0.1355,
152
  "step": 180
153
  },
154
  {
155
  "epoch": 0.52,
156
+ "learning_rate": 7.11864406779661e-05,
157
+ "loss": 0.1557,
158
  "step": 190
159
  },
160
  {
161
  "epoch": 0.54,
162
+ "learning_rate": 6.440677966101695e-05,
163
+ "loss": 0.1551,
164
  "step": 200
165
  },
166
  {
167
  "epoch": 0.54,
168
+ "eval_accuracy": 0.9114441416893733,
169
+ "eval_loss": 0.1725204885005951,
170
+ "eval_runtime": 17.0671,
171
+ "eval_samples_per_second": 43.007,
172
+ "eval_steps_per_second": 5.39,
173
  "step": 200
174
  },
175
  {
176
  "epoch": 0.57,
177
+ "learning_rate": 5.76271186440678e-05,
178
+ "loss": 0.2091,
179
  "step": 210
180
  },
181
  {
182
  "epoch": 0.6,
183
+ "learning_rate": 5.0847457627118643e-05,
184
+ "loss": 0.1313,
185
  "step": 220
186
  },
187
  {
188
  "epoch": 0.62,
189
+ "learning_rate": 4.4067796610169495e-05,
190
+ "loss": 0.1056,
191
  "step": 230
192
  },
193
  {
194
  "epoch": 0.65,
195
+ "learning_rate": 3.728813559322034e-05,
196
+ "loss": 0.1608,
197
  "step": 240
198
  },
199
  {
200
  "epoch": 0.65,
201
+ "eval_accuracy": 0.9673024523160763,
202
+ "eval_loss": 0.09211871027946472,
203
+ "eval_runtime": 16.6063,
204
+ "eval_samples_per_second": 44.2,
205
+ "eval_steps_per_second": 5.54,
206
  "step": 240
207
  },
208
  {
209
  "epoch": 0.68,
210
+ "learning_rate": 3.050847457627119e-05,
211
+ "loss": 0.0761,
212
  "step": 250
213
  },
214
  {
215
  "epoch": 0.71,
216
+ "learning_rate": 2.3728813559322036e-05,
217
+ "loss": 0.1044,
218
  "step": 260
219
  },
220
  {
221
  "epoch": 0.73,
222
+ "learning_rate": 1.694915254237288e-05,
223
+ "loss": 0.1141,
224
  "step": 270
225
  },
226
  {
227
  "epoch": 0.76,
228
+ "learning_rate": 1.016949152542373e-05,
229
+ "loss": 0.12,
230
  "step": 280
231
  },
232
  {
233
  "epoch": 0.76,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  "eval_accuracy": 0.9754768392370572,
235
+ "eval_loss": 0.07841455936431885,
236
+ "eval_runtime": 16.7459,
237
+ "eval_samples_per_second": 43.832,
238
+ "eval_steps_per_second": 5.494,
239
+ "step": 280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  }
241
  ],
242
+ "max_steps": 295,
243
+ "num_train_epochs": 1,
244
+ "total_flos": 3.471672249955123e+17,
245
  "trial_name": null,
246
  "trial_params": null
247
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29d42765cdc82ba9c501fd08fe3051fcacc5260575abfef3749cf9ec8c08e6be
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f07575c36fbac5b1ce76fcb42fdadefe90c95ce5be20b51ab48ba4f71f7e83f5
3
  size 3963