Upload folder using huggingface_hub

#5
by youngp5 - opened
Files changed (4) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. trainer_state.json +162 -162
  4. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5167b230451a751d190361a5f1abe4137481252f49d580e8af0df9e5b4e6771
3
  size 686525061
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e02db927d5123e3ae1c830db167827df07a85226a2c7faf31e17af0a0366b8
3
  size 686525061
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bdb68787afb42af5a9e0105b9694fc8eea4e4778940b6791e5c2e0e043994b3
3
  size 343271789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a51a5a533fd7542d18145c6853de93f25ee3d709d53ec729f2684e7565340f3d
3
  size 343271789
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.057097259908914566,
3
- "best_model_checkpoint": "./vit-base-beans/checkpoint-480",
4
  "epoch": 1.9565217391304348,
5
  "global_step": 720,
6
  "is_hyper_param_search": false,
@@ -10,595 +10,595 @@
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 0.0001972826086956522,
13
- "loss": 0.6016,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.05,
18
  "learning_rate": 0.00019456521739130434,
19
- "loss": 0.168,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.08,
24
  "learning_rate": 0.00019184782608695653,
25
- "loss": 0.1568,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.11,
30
  "learning_rate": 0.0001891304347826087,
31
- "loss": 0.1165,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.11,
36
- "eval_accuracy": 0.9754768392370572,
37
- "eval_loss": 0.10377205908298492,
38
- "eval_runtime": 18.7416,
39
- "eval_samples_per_second": 39.164,
40
- "eval_steps_per_second": 4.909,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 0.14,
45
  "learning_rate": 0.0001864130434782609,
46
- "loss": 0.0853,
47
  "step": 50
48
  },
49
  {
50
  "epoch": 0.16,
51
  "learning_rate": 0.00018369565217391304,
52
- "loss": 0.179,
53
  "step": 60
54
  },
55
  {
56
  "epoch": 0.19,
57
  "learning_rate": 0.00018097826086956522,
58
- "loss": 0.0871,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.22,
63
  "learning_rate": 0.0001782608695652174,
64
- "loss": 0.0596,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.22,
69
- "eval_accuracy": 0.9673024523160763,
70
- "eval_loss": 0.1255735605955124,
71
- "eval_runtime": 19.1765,
72
- "eval_samples_per_second": 38.276,
73
- "eval_steps_per_second": 4.798,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 0.24,
78
  "learning_rate": 0.00017554347826086956,
79
- "loss": 0.1279,
80
  "step": 90
81
  },
82
  {
83
  "epoch": 0.27,
84
  "learning_rate": 0.00017282608695652174,
85
- "loss": 0.0814,
86
  "step": 100
87
  },
88
  {
89
  "epoch": 0.3,
90
  "learning_rate": 0.00017010869565217392,
91
- "loss": 0.0967,
92
  "step": 110
93
  },
94
  {
95
  "epoch": 0.33,
96
  "learning_rate": 0.0001673913043478261,
97
- "loss": 0.1317,
98
  "step": 120
99
  },
100
  {
101
  "epoch": 0.33,
102
- "eval_accuracy": 0.9495912806539509,
103
- "eval_loss": 0.18217410147190094,
104
- "eval_runtime": 17.5926,
105
- "eval_samples_per_second": 41.722,
106
- "eval_steps_per_second": 5.229,
107
  "step": 120
108
  },
109
  {
110
  "epoch": 0.35,
111
  "learning_rate": 0.00016467391304347828,
112
- "loss": 0.0585,
113
  "step": 130
114
  },
115
  {
116
  "epoch": 0.38,
117
  "learning_rate": 0.00016195652173913046,
118
- "loss": 0.078,
119
  "step": 140
120
  },
121
  {
122
  "epoch": 0.41,
123
  "learning_rate": 0.00015923913043478262,
124
- "loss": 0.0794,
125
  "step": 150
126
  },
127
  {
128
  "epoch": 0.43,
129
  "learning_rate": 0.0001565217391304348,
130
- "loss": 0.0731,
131
  "step": 160
132
  },
133
  {
134
  "epoch": 0.43,
135
- "eval_accuracy": 0.9741144414168937,
136
- "eval_loss": 0.11143568158149719,
137
- "eval_runtime": 18.204,
138
- "eval_samples_per_second": 40.321,
139
- "eval_steps_per_second": 5.054,
140
  "step": 160
141
  },
142
  {
143
  "epoch": 0.46,
144
  "learning_rate": 0.00015380434782608698,
145
- "loss": 0.0842,
146
  "step": 170
147
  },
148
  {
149
  "epoch": 0.49,
150
  "learning_rate": 0.00015108695652173913,
151
- "loss": 0.0406,
152
  "step": 180
153
  },
154
  {
155
  "epoch": 0.52,
156
  "learning_rate": 0.0001483695652173913,
157
- "loss": 0.1171,
158
  "step": 190
159
  },
160
  {
161
  "epoch": 0.54,
162
  "learning_rate": 0.0001456521739130435,
163
- "loss": 0.1041,
164
  "step": 200
165
  },
166
  {
167
  "epoch": 0.54,
168
- "eval_accuracy": 0.9754768392370572,
169
- "eval_loss": 0.08514755964279175,
170
- "eval_runtime": 17.638,
171
- "eval_samples_per_second": 41.615,
172
- "eval_steps_per_second": 5.216,
173
  "step": 200
174
  },
175
  {
176
  "epoch": 0.57,
177
  "learning_rate": 0.00014293478260869567,
178
- "loss": 0.0823,
179
  "step": 210
180
  },
181
  {
182
  "epoch": 0.6,
183
  "learning_rate": 0.00014021739130434783,
184
- "loss": 0.0536,
185
  "step": 220
186
  },
187
  {
188
  "epoch": 0.62,
189
  "learning_rate": 0.0001375,
190
- "loss": 0.1171,
191
  "step": 230
192
  },
193
  {
194
  "epoch": 0.65,
195
  "learning_rate": 0.0001347826086956522,
196
- "loss": 0.1292,
197
  "step": 240
198
  },
199
  {
200
  "epoch": 0.65,
201
- "eval_accuracy": 0.9645776566757494,
202
- "eval_loss": 0.13422255218029022,
203
- "eval_runtime": 17.8577,
204
- "eval_samples_per_second": 41.103,
205
- "eval_steps_per_second": 5.152,
206
  "step": 240
207
  },
208
  {
209
  "epoch": 0.68,
210
  "learning_rate": 0.00013206521739130434,
211
- "loss": 0.1483,
212
  "step": 250
213
  },
214
  {
215
  "epoch": 0.71,
216
  "learning_rate": 0.00012934782608695652,
217
- "loss": 0.1082,
218
  "step": 260
219
  },
220
  {
221
  "epoch": 0.73,
222
  "learning_rate": 0.0001266304347826087,
223
- "loss": 0.1206,
224
  "step": 270
225
  },
226
  {
227
  "epoch": 0.76,
228
  "learning_rate": 0.00012391304347826086,
229
- "loss": 0.1232,
230
  "step": 280
231
  },
232
  {
233
  "epoch": 0.76,
234
- "eval_accuracy": 0.9741144414168937,
235
- "eval_loss": 0.08246026188135147,
236
- "eval_runtime": 17.6614,
237
- "eval_samples_per_second": 41.56,
238
- "eval_steps_per_second": 5.209,
239
  "step": 280
240
  },
241
  {
242
  "epoch": 0.79,
243
  "learning_rate": 0.00012119565217391304,
244
- "loss": 0.0745,
245
  "step": 290
246
  },
247
  {
248
  "epoch": 0.82,
249
  "learning_rate": 0.00011847826086956522,
250
- "loss": 0.1076,
251
  "step": 300
252
  },
253
  {
254
  "epoch": 0.84,
255
  "learning_rate": 0.00011576086956521739,
256
- "loss": 0.0651,
257
  "step": 310
258
  },
259
  {
260
  "epoch": 0.87,
261
  "learning_rate": 0.00011304347826086956,
262
- "loss": 0.0782,
263
  "step": 320
264
  },
265
  {
266
  "epoch": 0.87,
267
- "eval_accuracy": 0.9741144414168937,
268
- "eval_loss": 0.09356382489204407,
269
- "eval_runtime": 17.6794,
270
- "eval_samples_per_second": 41.517,
271
- "eval_steps_per_second": 5.204,
272
  "step": 320
273
  },
274
  {
275
  "epoch": 0.9,
276
  "learning_rate": 0.00011032608695652174,
277
- "loss": 0.115,
278
  "step": 330
279
  },
280
  {
281
  "epoch": 0.92,
282
  "learning_rate": 0.0001076086956521739,
283
- "loss": 0.0635,
284
  "step": 340
285
  },
286
  {
287
  "epoch": 0.95,
288
  "learning_rate": 0.0001048913043478261,
289
- "loss": 0.1061,
290
  "step": 350
291
  },
292
  {
293
  "epoch": 0.98,
294
  "learning_rate": 0.00010217391304347828,
295
- "loss": 0.1171,
296
  "step": 360
297
  },
298
  {
299
  "epoch": 0.98,
300
- "eval_accuracy": 0.9768392370572208,
301
- "eval_loss": 0.06765041500329971,
302
- "eval_runtime": 18.1237,
303
- "eval_samples_per_second": 40.499,
304
- "eval_steps_per_second": 5.076,
305
  "step": 360
306
  },
307
  {
308
  "epoch": 1.01,
309
  "learning_rate": 9.945652173913043e-05,
310
- "loss": 0.086,
311
  "step": 370
312
  },
313
  {
314
  "epoch": 1.03,
315
  "learning_rate": 9.673913043478261e-05,
316
- "loss": 0.0685,
317
  "step": 380
318
  },
319
  {
320
  "epoch": 1.06,
321
  "learning_rate": 9.402173913043478e-05,
322
- "loss": 0.052,
323
  "step": 390
324
  },
325
  {
326
  "epoch": 1.09,
327
  "learning_rate": 9.130434782608696e-05,
328
- "loss": 0.0549,
329
  "step": 400
330
  },
331
  {
332
  "epoch": 1.09,
333
- "eval_accuracy": 0.9727520435967303,
334
- "eval_loss": 0.06371203064918518,
335
- "eval_runtime": 17.6078,
336
- "eval_samples_per_second": 41.686,
337
- "eval_steps_per_second": 5.225,
338
  "step": 400
339
  },
340
  {
341
  "epoch": 1.11,
342
  "learning_rate": 8.858695652173914e-05,
343
- "loss": 0.058,
344
  "step": 410
345
  },
346
  {
347
  "epoch": 1.14,
348
  "learning_rate": 8.586956521739131e-05,
349
- "loss": 0.1215,
350
  "step": 420
351
  },
352
  {
353
  "epoch": 1.17,
354
  "learning_rate": 8.315217391304349e-05,
355
- "loss": 0.1048,
356
  "step": 430
357
  },
358
  {
359
  "epoch": 1.2,
360
  "learning_rate": 8.043478260869566e-05,
361
- "loss": 0.0603,
362
  "step": 440
363
  },
364
  {
365
  "epoch": 1.2,
366
- "eval_accuracy": 0.9768392370572208,
367
- "eval_loss": 0.06182762607932091,
368
- "eval_runtime": 18.5478,
369
- "eval_samples_per_second": 39.574,
370
- "eval_steps_per_second": 4.96,
371
  "step": 440
372
  },
373
  {
374
  "epoch": 1.22,
375
  "learning_rate": 7.771739130434783e-05,
376
- "loss": 0.0648,
377
  "step": 450
378
  },
379
  {
380
  "epoch": 1.25,
381
  "learning_rate": 7.500000000000001e-05,
382
- "loss": 0.0512,
383
  "step": 460
384
  },
385
  {
386
  "epoch": 1.28,
387
  "learning_rate": 7.228260869565217e-05,
388
- "loss": 0.0867,
389
  "step": 470
390
  },
391
  {
392
  "epoch": 1.3,
393
  "learning_rate": 6.956521739130436e-05,
394
- "loss": 0.049,
395
  "step": 480
396
  },
397
  {
398
  "epoch": 1.3,
399
- "eval_accuracy": 0.9768392370572208,
400
- "eval_loss": 0.057097259908914566,
401
- "eval_runtime": 17.4967,
402
- "eval_samples_per_second": 41.951,
403
- "eval_steps_per_second": 5.258,
404
  "step": 480
405
  },
406
  {
407
  "epoch": 1.33,
408
  "learning_rate": 6.684782608695652e-05,
409
- "loss": 0.0389,
410
  "step": 490
411
  },
412
  {
413
  "epoch": 1.36,
414
  "learning_rate": 6.413043478260869e-05,
415
- "loss": 0.087,
416
  "step": 500
417
  },
418
  {
419
  "epoch": 1.39,
420
  "learning_rate": 6.141304347826087e-05,
421
- "loss": 0.0733,
422
  "step": 510
423
  },
424
  {
425
  "epoch": 1.41,
426
  "learning_rate": 5.869565217391305e-05,
427
- "loss": 0.0199,
428
  "step": 520
429
  },
430
  {
431
  "epoch": 1.41,
432
  "eval_accuracy": 0.9727520435967303,
433
- "eval_loss": 0.07243036478757858,
434
- "eval_runtime": 17.9356,
435
- "eval_samples_per_second": 40.924,
436
- "eval_steps_per_second": 5.129,
437
  "step": 520
438
  },
439
  {
440
  "epoch": 1.44,
441
  "learning_rate": 5.5978260869565226e-05,
442
- "loss": 0.0629,
443
  "step": 530
444
  },
445
  {
446
  "epoch": 1.47,
447
  "learning_rate": 5.32608695652174e-05,
448
- "loss": 0.0752,
449
  "step": 540
450
  },
451
  {
452
  "epoch": 1.49,
453
  "learning_rate": 5.054347826086957e-05,
454
- "loss": 0.0894,
455
  "step": 550
456
  },
457
  {
458
  "epoch": 1.52,
459
  "learning_rate": 4.782608695652174e-05,
460
- "loss": 0.0787,
461
  "step": 560
462
  },
463
  {
464
  "epoch": 1.52,
465
  "eval_accuracy": 0.9754768392370572,
466
- "eval_loss": 0.06181642785668373,
467
- "eval_runtime": 17.5051,
468
- "eval_samples_per_second": 41.931,
469
- "eval_steps_per_second": 5.256,
470
  "step": 560
471
  },
472
  {
473
  "epoch": 1.55,
474
  "learning_rate": 4.5108695652173916e-05,
475
- "loss": 0.1088,
476
  "step": 570
477
  },
478
  {
479
  "epoch": 1.58,
480
  "learning_rate": 4.239130434782609e-05,
481
- "loss": 0.0586,
482
  "step": 580
483
  },
484
  {
485
  "epoch": 1.6,
486
  "learning_rate": 3.9673913043478264e-05,
487
- "loss": 0.0972,
488
  "step": 590
489
  },
490
  {
491
  "epoch": 1.63,
492
  "learning_rate": 3.695652173913043e-05,
493
- "loss": 0.049,
494
  "step": 600
495
  },
496
  {
497
  "epoch": 1.63,
498
- "eval_accuracy": 0.9754768392370572,
499
- "eval_loss": 0.058572325855493546,
500
- "eval_runtime": 19.2109,
501
- "eval_samples_per_second": 38.207,
502
- "eval_steps_per_second": 4.789,
503
  "step": 600
504
  },
505
  {
506
  "epoch": 1.66,
507
  "learning_rate": 3.423913043478261e-05,
508
- "loss": 0.0505,
509
  "step": 610
510
  },
511
  {
512
  "epoch": 1.68,
513
  "learning_rate": 3.152173913043479e-05,
514
- "loss": 0.0226,
515
  "step": 620
516
  },
517
  {
518
  "epoch": 1.71,
519
  "learning_rate": 2.8804347826086957e-05,
520
- "loss": 0.0754,
521
  "step": 630
522
  },
523
  {
524
  "epoch": 1.74,
525
  "learning_rate": 2.608695652173913e-05,
526
- "loss": 0.0356,
527
  "step": 640
528
  },
529
  {
530
  "epoch": 1.74,
531
- "eval_accuracy": 0.9754768392370572,
532
- "eval_loss": 0.05890597403049469,
533
- "eval_runtime": 17.4571,
534
- "eval_samples_per_second": 42.046,
535
- "eval_steps_per_second": 5.27,
536
  "step": 640
537
  },
538
  {
539
  "epoch": 1.77,
540
  "learning_rate": 2.3369565217391306e-05,
541
- "loss": 0.0261,
542
  "step": 650
543
  },
544
  {
545
  "epoch": 1.79,
546
  "learning_rate": 2.065217391304348e-05,
547
- "loss": 0.1149,
548
  "step": 660
549
  },
550
  {
551
  "epoch": 1.82,
552
  "learning_rate": 1.793478260869565e-05,
553
- "loss": 0.0605,
554
  "step": 670
555
  },
556
  {
557
  "epoch": 1.85,
558
  "learning_rate": 1.5217391304347828e-05,
559
- "loss": 0.0761,
560
  "step": 680
561
  },
562
  {
563
  "epoch": 1.85,
564
- "eval_accuracy": 0.9754768392370572,
565
- "eval_loss": 0.062350884079933167,
566
- "eval_runtime": 17.6913,
567
- "eval_samples_per_second": 41.489,
568
- "eval_steps_per_second": 5.2,
569
  "step": 680
570
  },
571
  {
572
  "epoch": 1.88,
573
  "learning_rate": 1.25e-05,
574
- "loss": 0.1011,
575
  "step": 690
576
  },
577
  {
578
  "epoch": 1.9,
579
  "learning_rate": 9.782608695652175e-06,
580
- "loss": 0.0473,
581
  "step": 700
582
  },
583
  {
584
  "epoch": 1.93,
585
  "learning_rate": 7.065217391304347e-06,
586
- "loss": 0.0595,
587
  "step": 710
588
  },
589
  {
590
  "epoch": 1.96,
591
  "learning_rate": 4.347826086956522e-06,
592
- "loss": 0.0566,
593
  "step": 720
594
  },
595
  {
596
  "epoch": 1.96,
597
- "eval_accuracy": 0.9754768392370572,
598
- "eval_loss": 0.059235844761133194,
599
- "eval_runtime": 17.9656,
600
- "eval_samples_per_second": 40.856,
601
- "eval_steps_per_second": 5.121,
602
  "step": 720
603
  }
604
  ],
 
1
  {
2
+ "best_metric": 0.03899691626429558,
3
+ "best_model_checkpoint": "./vit-base-beans/checkpoint-720",
4
  "epoch": 1.9565217391304348,
5
  "global_step": 720,
6
  "is_hyper_param_search": false,
 
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 0.0001972826086956522,
13
+ "loss": 0.5574,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.05,
18
  "learning_rate": 0.00019456521739130434,
19
+ "loss": 0.3197,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.08,
24
  "learning_rate": 0.00019184782608695653,
25
+ "loss": 0.271,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.11,
30
  "learning_rate": 0.0001891304347826087,
31
+ "loss": 0.2484,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.11,
36
+ "eval_accuracy": 0.8801089918256131,
37
+ "eval_loss": 0.21790249645709991,
38
+ "eval_runtime": 16.4161,
39
+ "eval_samples_per_second": 44.712,
40
+ "eval_steps_per_second": 5.604,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 0.14,
45
  "learning_rate": 0.0001864130434782609,
46
+ "loss": 0.2372,
47
  "step": 50
48
  },
49
  {
50
  "epoch": 0.16,
51
  "learning_rate": 0.00018369565217391304,
52
+ "loss": 0.1945,
53
  "step": 60
54
  },
55
  {
56
  "epoch": 0.19,
57
  "learning_rate": 0.00018097826086956522,
58
+ "loss": 0.2186,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.22,
63
  "learning_rate": 0.0001782608695652174,
64
+ "loss": 0.3033,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.22,
69
+ "eval_accuracy": 0.8678474114441417,
70
+ "eval_loss": 0.28713682293891907,
71
+ "eval_runtime": 18.6159,
72
+ "eval_samples_per_second": 39.429,
73
+ "eval_steps_per_second": 4.942,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 0.24,
78
  "learning_rate": 0.00017554347826086956,
79
+ "loss": 0.2893,
80
  "step": 90
81
  },
82
  {
83
  "epoch": 0.27,
84
  "learning_rate": 0.00017282608695652174,
85
+ "loss": 0.3135,
86
  "step": 100
87
  },
88
  {
89
  "epoch": 0.3,
90
  "learning_rate": 0.00017010869565217392,
91
+ "loss": 0.2673,
92
  "step": 110
93
  },
94
  {
95
  "epoch": 0.33,
96
  "learning_rate": 0.0001673913043478261,
97
+ "loss": 0.2709,
98
  "step": 120
99
  },
100
  {
101
  "epoch": 0.33,
102
+ "eval_accuracy": 0.9073569482288828,
103
+ "eval_loss": 0.23233647644519806,
104
+ "eval_runtime": 16.7819,
105
+ "eval_samples_per_second": 43.738,
106
+ "eval_steps_per_second": 5.482,
107
  "step": 120
108
  },
109
  {
110
  "epoch": 0.35,
111
  "learning_rate": 0.00016467391304347828,
112
+ "loss": 0.2171,
113
  "step": 130
114
  },
115
  {
116
  "epoch": 0.38,
117
  "learning_rate": 0.00016195652173913046,
118
+ "loss": 0.2393,
119
  "step": 140
120
  },
121
  {
122
  "epoch": 0.41,
123
  "learning_rate": 0.00015923913043478262,
124
+ "loss": 0.1934,
125
  "step": 150
126
  },
127
  {
128
  "epoch": 0.43,
129
  "learning_rate": 0.0001565217391304348,
130
+ "loss": 0.1878,
131
  "step": 160
132
  },
133
  {
134
  "epoch": 0.43,
135
+ "eval_accuracy": 0.9318801089918256,
136
+ "eval_loss": 0.1636728048324585,
137
+ "eval_runtime": 16.6111,
138
+ "eval_samples_per_second": 44.187,
139
+ "eval_steps_per_second": 5.538,
140
  "step": 160
141
  },
142
  {
143
  "epoch": 0.46,
144
  "learning_rate": 0.00015380434782608698,
145
+ "loss": 0.2186,
146
  "step": 170
147
  },
148
  {
149
  "epoch": 0.49,
150
  "learning_rate": 0.00015108695652173913,
151
+ "loss": 0.1952,
152
  "step": 180
153
  },
154
  {
155
  "epoch": 0.52,
156
  "learning_rate": 0.0001483695652173913,
157
+ "loss": 0.1546,
158
  "step": 190
159
  },
160
  {
161
  "epoch": 0.54,
162
  "learning_rate": 0.0001456521739130435,
163
+ "loss": 0.1655,
164
  "step": 200
165
  },
166
  {
167
  "epoch": 0.54,
168
+ "eval_accuracy": 0.8678474114441417,
169
+ "eval_loss": 0.24226485192775726,
170
+ "eval_runtime": 17.096,
171
+ "eval_samples_per_second": 42.934,
172
+ "eval_steps_per_second": 5.381,
173
  "step": 200
174
  },
175
  {
176
  "epoch": 0.57,
177
  "learning_rate": 0.00014293478260869567,
178
+ "loss": 0.1843,
179
  "step": 210
180
  },
181
  {
182
  "epoch": 0.6,
183
  "learning_rate": 0.00014021739130434783,
184
+ "loss": 0.12,
185
  "step": 220
186
  },
187
  {
188
  "epoch": 0.62,
189
  "learning_rate": 0.0001375,
190
+ "loss": 0.1686,
191
  "step": 230
192
  },
193
  {
194
  "epoch": 0.65,
195
  "learning_rate": 0.0001347826086956522,
196
+ "loss": 0.2393,
197
  "step": 240
198
  },
199
  {
200
  "epoch": 0.65,
201
+ "eval_accuracy": 0.94141689373297,
202
+ "eval_loss": 0.1487017571926117,
203
+ "eval_runtime": 16.7397,
204
+ "eval_samples_per_second": 43.848,
205
+ "eval_steps_per_second": 5.496,
206
  "step": 240
207
  },
208
  {
209
  "epoch": 0.68,
210
  "learning_rate": 0.00013206521739130434,
211
+ "loss": 0.2159,
212
  "step": 250
213
  },
214
  {
215
  "epoch": 0.71,
216
  "learning_rate": 0.00012934782608695652,
217
+ "loss": 0.1608,
218
  "step": 260
219
  },
220
  {
221
  "epoch": 0.73,
222
  "learning_rate": 0.0001266304347826087,
223
+ "loss": 0.1508,
224
  "step": 270
225
  },
226
  {
227
  "epoch": 0.76,
228
  "learning_rate": 0.00012391304347826086,
229
+ "loss": 0.1432,
230
  "step": 280
231
  },
232
  {
233
  "epoch": 0.76,
234
+ "eval_accuracy": 0.9209809264305178,
235
+ "eval_loss": 0.1679653823375702,
236
+ "eval_runtime": 19.1578,
237
+ "eval_samples_per_second": 38.313,
238
+ "eval_steps_per_second": 4.802,
239
  "step": 280
240
  },
241
  {
242
  "epoch": 0.79,
243
  "learning_rate": 0.00012119565217391304,
244
+ "loss": 0.1852,
245
  "step": 290
246
  },
247
  {
248
  "epoch": 0.82,
249
  "learning_rate": 0.00011847826086956522,
250
+ "loss": 0.1845,
251
  "step": 300
252
  },
253
  {
254
  "epoch": 0.84,
255
  "learning_rate": 0.00011576086956521739,
256
+ "loss": 0.1238,
257
  "step": 310
258
  },
259
  {
260
  "epoch": 0.87,
261
  "learning_rate": 0.00011304347826086956,
262
+ "loss": 0.0986,
263
  "step": 320
264
  },
265
  {
266
  "epoch": 0.87,
267
+ "eval_accuracy": 0.9604904632152589,
268
+ "eval_loss": 0.11554770916700363,
269
+ "eval_runtime": 16.799,
270
+ "eval_samples_per_second": 43.693,
271
+ "eval_steps_per_second": 5.477,
272
  "step": 320
273
  },
274
  {
275
  "epoch": 0.9,
276
  "learning_rate": 0.00011032608695652174,
277
+ "loss": 0.1781,
278
  "step": 330
279
  },
280
  {
281
  "epoch": 0.92,
282
  "learning_rate": 0.0001076086956521739,
283
+ "loss": 0.1229,
284
  "step": 340
285
  },
286
  {
287
  "epoch": 0.95,
288
  "learning_rate": 0.0001048913043478261,
289
+ "loss": 0.1238,
290
  "step": 350
291
  },
292
  {
293
  "epoch": 0.98,
294
  "learning_rate": 0.00010217391304347828,
295
+ "loss": 0.0729,
296
  "step": 360
297
  },
298
  {
299
  "epoch": 0.98,
300
+ "eval_accuracy": 0.9577656675749319,
301
+ "eval_loss": 0.10841722786426544,
302
+ "eval_runtime": 17.3667,
303
+ "eval_samples_per_second": 42.265,
304
+ "eval_steps_per_second": 5.297,
305
  "step": 360
306
  },
307
  {
308
  "epoch": 1.01,
309
  "learning_rate": 9.945652173913043e-05,
310
+ "loss": 0.1156,
311
  "step": 370
312
  },
313
  {
314
  "epoch": 1.03,
315
  "learning_rate": 9.673913043478261e-05,
316
+ "loss": 0.0934,
317
  "step": 380
318
  },
319
  {
320
  "epoch": 1.06,
321
  "learning_rate": 9.402173913043478e-05,
322
+ "loss": 0.082,
323
  "step": 390
324
  },
325
  {
326
  "epoch": 1.09,
327
  "learning_rate": 9.130434782608696e-05,
328
+ "loss": 0.0553,
329
  "step": 400
330
  },
331
  {
332
  "epoch": 1.09,
333
+ "eval_accuracy": 0.9632152588555858,
334
+ "eval_loss": 0.09444452077150345,
335
+ "eval_runtime": 16.7805,
336
+ "eval_samples_per_second": 43.741,
337
+ "eval_steps_per_second": 5.483,
338
  "step": 400
339
  },
340
  {
341
  "epoch": 1.11,
342
  "learning_rate": 8.858695652173914e-05,
343
+ "loss": 0.1067,
344
  "step": 410
345
  },
346
  {
347
  "epoch": 1.14,
348
  "learning_rate": 8.586956521739131e-05,
349
+ "loss": 0.0544,
350
  "step": 420
351
  },
352
  {
353
  "epoch": 1.17,
354
  "learning_rate": 8.315217391304349e-05,
355
+ "loss": 0.1047,
356
  "step": 430
357
  },
358
  {
359
  "epoch": 1.2,
360
  "learning_rate": 8.043478260869566e-05,
361
+ "loss": 0.0395,
362
  "step": 440
363
  },
364
  {
365
  "epoch": 1.2,
366
+ "eval_accuracy": 0.9673024523160763,
367
+ "eval_loss": 0.07385484129190445,
368
+ "eval_runtime": 17.3745,
369
+ "eval_samples_per_second": 42.246,
370
+ "eval_steps_per_second": 5.295,
371
  "step": 440
372
  },
373
  {
374
  "epoch": 1.22,
375
  "learning_rate": 7.771739130434783e-05,
376
+ "loss": 0.0566,
377
  "step": 450
378
  },
379
  {
380
  "epoch": 1.25,
381
  "learning_rate": 7.500000000000001e-05,
382
+ "loss": 0.0625,
383
  "step": 460
384
  },
385
  {
386
  "epoch": 1.28,
387
  "learning_rate": 7.228260869565217e-05,
388
+ "loss": 0.0961,
389
  "step": 470
390
  },
391
  {
392
  "epoch": 1.3,
393
  "learning_rate": 6.956521739130436e-05,
394
+ "loss": 0.0178,
395
  "step": 480
396
  },
397
  {
398
  "epoch": 1.3,
399
+ "eval_accuracy": 0.9673024523160763,
400
+ "eval_loss": 0.10838904976844788,
401
+ "eval_runtime": 16.8091,
402
+ "eval_samples_per_second": 43.667,
403
+ "eval_steps_per_second": 5.473,
404
  "step": 480
405
  },
406
  {
407
  "epoch": 1.33,
408
  "learning_rate": 6.684782608695652e-05,
409
+ "loss": 0.0641,
410
  "step": 490
411
  },
412
  {
413
  "epoch": 1.36,
414
  "learning_rate": 6.413043478260869e-05,
415
+ "loss": 0.0497,
416
  "step": 500
417
  },
418
  {
419
  "epoch": 1.39,
420
  "learning_rate": 6.141304347826087e-05,
421
+ "loss": 0.0364,
422
  "step": 510
423
  },
424
  {
425
  "epoch": 1.41,
426
  "learning_rate": 5.869565217391305e-05,
427
+ "loss": 0.0273,
428
  "step": 520
429
  },
430
  {
431
  "epoch": 1.41,
432
  "eval_accuracy": 0.9727520435967303,
433
+ "eval_loss": 0.07216904312372208,
434
+ "eval_runtime": 17.0234,
435
+ "eval_samples_per_second": 43.117,
436
+ "eval_steps_per_second": 5.404,
437
  "step": 520
438
  },
439
  {
440
  "epoch": 1.44,
441
  "learning_rate": 5.5978260869565226e-05,
442
+ "loss": 0.0558,
443
  "step": 530
444
  },
445
  {
446
  "epoch": 1.47,
447
  "learning_rate": 5.32608695652174e-05,
448
+ "loss": 0.0379,
449
  "step": 540
450
  },
451
  {
452
  "epoch": 1.49,
453
  "learning_rate": 5.054347826086957e-05,
454
+ "loss": 0.0569,
455
  "step": 550
456
  },
457
  {
458
  "epoch": 1.52,
459
  "learning_rate": 4.782608695652174e-05,
460
+ "loss": 0.0273,
461
  "step": 560
462
  },
463
  {
464
  "epoch": 1.52,
465
  "eval_accuracy": 0.9754768392370572,
466
+ "eval_loss": 0.0689365416765213,
467
+ "eval_runtime": 16.7035,
468
+ "eval_samples_per_second": 43.943,
469
+ "eval_steps_per_second": 5.508,
470
  "step": 560
471
  },
472
  {
473
  "epoch": 1.55,
474
  "learning_rate": 4.5108695652173916e-05,
475
+ "loss": 0.0087,
476
  "step": 570
477
  },
478
  {
479
  "epoch": 1.58,
480
  "learning_rate": 4.239130434782609e-05,
481
+ "loss": 0.0347,
482
  "step": 580
483
  },
484
  {
485
  "epoch": 1.6,
486
  "learning_rate": 3.9673913043478264e-05,
487
+ "loss": 0.0196,
488
  "step": 590
489
  },
490
  {
491
  "epoch": 1.63,
492
  "learning_rate": 3.695652173913043e-05,
493
+ "loss": 0.0271,
494
  "step": 600
495
  },
496
  {
497
  "epoch": 1.63,
498
+ "eval_accuracy": 0.9795640326975477,
499
+ "eval_loss": 0.07317620515823364,
500
+ "eval_runtime": 16.8063,
501
+ "eval_samples_per_second": 43.674,
502
+ "eval_steps_per_second": 5.474,
503
  "step": 600
504
  },
505
  {
506
  "epoch": 1.66,
507
  "learning_rate": 3.423913043478261e-05,
508
+ "loss": 0.0951,
509
  "step": 610
510
  },
511
  {
512
  "epoch": 1.68,
513
  "learning_rate": 3.152173913043479e-05,
514
+ "loss": 0.0726,
515
  "step": 620
516
  },
517
  {
518
  "epoch": 1.71,
519
  "learning_rate": 2.8804347826086957e-05,
520
+ "loss": 0.01,
521
  "step": 630
522
  },
523
  {
524
  "epoch": 1.74,
525
  "learning_rate": 2.608695652173913e-05,
526
+ "loss": 0.0294,
527
  "step": 640
528
  },
529
  {
530
  "epoch": 1.74,
531
+ "eval_accuracy": 0.9822888283378747,
532
+ "eval_loss": 0.057586103677749634,
533
+ "eval_runtime": 17.373,
534
+ "eval_samples_per_second": 42.249,
535
+ "eval_steps_per_second": 5.296,
536
  "step": 640
537
  },
538
  {
539
  "epoch": 1.77,
540
  "learning_rate": 2.3369565217391306e-05,
541
+ "loss": 0.017,
542
  "step": 650
543
  },
544
  {
545
  "epoch": 1.79,
546
  "learning_rate": 2.065217391304348e-05,
547
+ "loss": 0.0271,
548
  "step": 660
549
  },
550
  {
551
  "epoch": 1.82,
552
  "learning_rate": 1.793478260869565e-05,
553
+ "loss": 0.0572,
554
  "step": 670
555
  },
556
  {
557
  "epoch": 1.85,
558
  "learning_rate": 1.5217391304347828e-05,
559
+ "loss": 0.0449,
560
  "step": 680
561
  },
562
  {
563
  "epoch": 1.85,
564
+ "eval_accuracy": 0.9877384196185286,
565
+ "eval_loss": 0.04121558368206024,
566
+ "eval_runtime": 16.5738,
567
+ "eval_samples_per_second": 44.287,
568
+ "eval_steps_per_second": 5.551,
569
  "step": 680
570
  },
571
  {
572
  "epoch": 1.88,
573
  "learning_rate": 1.25e-05,
574
+ "loss": 0.0261,
575
  "step": 690
576
  },
577
  {
578
  "epoch": 1.9,
579
  "learning_rate": 9.782608695652175e-06,
580
+ "loss": 0.0091,
581
  "step": 700
582
  },
583
  {
584
  "epoch": 1.93,
585
  "learning_rate": 7.065217391304347e-06,
586
+ "loss": 0.0799,
587
  "step": 710
588
  },
589
  {
590
  "epoch": 1.96,
591
  "learning_rate": 4.347826086956522e-06,
592
+ "loss": 0.0076,
593
  "step": 720
594
  },
595
  {
596
  "epoch": 1.96,
597
+ "eval_accuracy": 0.9877384196185286,
598
+ "eval_loss": 0.03899691626429558,
599
+ "eval_runtime": 17.1545,
600
+ "eval_samples_per_second": 42.788,
601
+ "eval_steps_per_second": 5.363,
602
  "step": 720
603
  }
604
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94024797e7d1316e62c6bf965da1ed7e576bfdede5f093d36faf5554fa254f21
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d42765cdc82ba9c501fd08fe3051fcacc5260575abfef3749cf9ec8c08e6be
3
  size 3963