youngp5 commited on
Commit
5b80219
1 Parent(s): e2fa668

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Benign",
13
+ "1": "Malignant",
14
+ "2": "NoTumor"
15
+ },
16
+ "image_size": 224,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "Benign": "0",
21
+ "Malignant": "1",
22
+ "NoTumor": "2"
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "model_type": "vit",
26
+ "num_attention_heads": 12,
27
+ "num_channels": 3,
28
+ "num_hidden_layers": 12,
29
+ "patch_size": 16,
30
+ "problem_type": "single_label_classification",
31
+ "qkv_bias": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.31.0"
34
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5167b230451a751d190361a5f1abe4137481252f49d580e8af0df9e5b4e6771
3
+ size 686525061
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bdb68787afb42af5a9e0105b9694fc8eea4e4778940b6791e5c2e0e043994b3
3
+ size 343271789
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab62043be50b93d4eb28964be2d945176db3d64fe73ddd052a7656ba9141c683
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7be74ea536e79eb53304c8ad7665b1808b95e8a71eefc3a101477593346ec95
3
+ size 627
trainer_state.json ADDED
@@ -0,0 +1,610 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.057097259908914566,
3
+ "best_model_checkpoint": "./vit-base-beans/checkpoint-480",
4
+ "epoch": 1.9565217391304348,
5
+ "global_step": 720,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.03,
12
+ "learning_rate": 0.0001972826086956522,
13
+ "loss": 0.6016,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.05,
18
+ "learning_rate": 0.00019456521739130434,
19
+ "loss": 0.168,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.08,
24
+ "learning_rate": 0.00019184782608695653,
25
+ "loss": 0.1568,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.11,
30
+ "learning_rate": 0.0001891304347826087,
31
+ "loss": 0.1165,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.11,
36
+ "eval_accuracy": 0.9754768392370572,
37
+ "eval_loss": 0.10377205908298492,
38
+ "eval_runtime": 18.7416,
39
+ "eval_samples_per_second": 39.164,
40
+ "eval_steps_per_second": 4.909,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 0.14,
45
+ "learning_rate": 0.0001864130434782609,
46
+ "loss": 0.0853,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 0.16,
51
+ "learning_rate": 0.00018369565217391304,
52
+ "loss": 0.179,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 0.19,
57
+ "learning_rate": 0.00018097826086956522,
58
+ "loss": 0.0871,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.22,
63
+ "learning_rate": 0.0001782608695652174,
64
+ "loss": 0.0596,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.22,
69
+ "eval_accuracy": 0.9673024523160763,
70
+ "eval_loss": 0.1255735605955124,
71
+ "eval_runtime": 19.1765,
72
+ "eval_samples_per_second": 38.276,
73
+ "eval_steps_per_second": 4.798,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 0.24,
78
+ "learning_rate": 0.00017554347826086956,
79
+ "loss": 0.1279,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 0.27,
84
+ "learning_rate": 0.00017282608695652174,
85
+ "loss": 0.0814,
86
+ "step": 100
87
+ },
88
+ {
89
+ "epoch": 0.3,
90
+ "learning_rate": 0.00017010869565217392,
91
+ "loss": 0.0967,
92
+ "step": 110
93
+ },
94
+ {
95
+ "epoch": 0.33,
96
+ "learning_rate": 0.0001673913043478261,
97
+ "loss": 0.1317,
98
+ "step": 120
99
+ },
100
+ {
101
+ "epoch": 0.33,
102
+ "eval_accuracy": 0.9495912806539509,
103
+ "eval_loss": 0.18217410147190094,
104
+ "eval_runtime": 17.5926,
105
+ "eval_samples_per_second": 41.722,
106
+ "eval_steps_per_second": 5.229,
107
+ "step": 120
108
+ },
109
+ {
110
+ "epoch": 0.35,
111
+ "learning_rate": 0.00016467391304347828,
112
+ "loss": 0.0585,
113
+ "step": 130
114
+ },
115
+ {
116
+ "epoch": 0.38,
117
+ "learning_rate": 0.00016195652173913046,
118
+ "loss": 0.078,
119
+ "step": 140
120
+ },
121
+ {
122
+ "epoch": 0.41,
123
+ "learning_rate": 0.00015923913043478262,
124
+ "loss": 0.0794,
125
+ "step": 150
126
+ },
127
+ {
128
+ "epoch": 0.43,
129
+ "learning_rate": 0.0001565217391304348,
130
+ "loss": 0.0731,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 0.43,
135
+ "eval_accuracy": 0.9741144414168937,
136
+ "eval_loss": 0.11143568158149719,
137
+ "eval_runtime": 18.204,
138
+ "eval_samples_per_second": 40.321,
139
+ "eval_steps_per_second": 5.054,
140
+ "step": 160
141
+ },
142
+ {
143
+ "epoch": 0.46,
144
+ "learning_rate": 0.00015380434782608698,
145
+ "loss": 0.0842,
146
+ "step": 170
147
+ },
148
+ {
149
+ "epoch": 0.49,
150
+ "learning_rate": 0.00015108695652173913,
151
+ "loss": 0.0406,
152
+ "step": 180
153
+ },
154
+ {
155
+ "epoch": 0.52,
156
+ "learning_rate": 0.0001483695652173913,
157
+ "loss": 0.1171,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 0.54,
162
+ "learning_rate": 0.0001456521739130435,
163
+ "loss": 0.1041,
164
+ "step": 200
165
+ },
166
+ {
167
+ "epoch": 0.54,
168
+ "eval_accuracy": 0.9754768392370572,
169
+ "eval_loss": 0.08514755964279175,
170
+ "eval_runtime": 17.638,
171
+ "eval_samples_per_second": 41.615,
172
+ "eval_steps_per_second": 5.216,
173
+ "step": 200
174
+ },
175
+ {
176
+ "epoch": 0.57,
177
+ "learning_rate": 0.00014293478260869567,
178
+ "loss": 0.0823,
179
+ "step": 210
180
+ },
181
+ {
182
+ "epoch": 0.6,
183
+ "learning_rate": 0.00014021739130434783,
184
+ "loss": 0.0536,
185
+ "step": 220
186
+ },
187
+ {
188
+ "epoch": 0.62,
189
+ "learning_rate": 0.0001375,
190
+ "loss": 0.1171,
191
+ "step": 230
192
+ },
193
+ {
194
+ "epoch": 0.65,
195
+ "learning_rate": 0.0001347826086956522,
196
+ "loss": 0.1292,
197
+ "step": 240
198
+ },
199
+ {
200
+ "epoch": 0.65,
201
+ "eval_accuracy": 0.9645776566757494,
202
+ "eval_loss": 0.13422255218029022,
203
+ "eval_runtime": 17.8577,
204
+ "eval_samples_per_second": 41.103,
205
+ "eval_steps_per_second": 5.152,
206
+ "step": 240
207
+ },
208
+ {
209
+ "epoch": 0.68,
210
+ "learning_rate": 0.00013206521739130434,
211
+ "loss": 0.1483,
212
+ "step": 250
213
+ },
214
+ {
215
+ "epoch": 0.71,
216
+ "learning_rate": 0.00012934782608695652,
217
+ "loss": 0.1082,
218
+ "step": 260
219
+ },
220
+ {
221
+ "epoch": 0.73,
222
+ "learning_rate": 0.0001266304347826087,
223
+ "loss": 0.1206,
224
+ "step": 270
225
+ },
226
+ {
227
+ "epoch": 0.76,
228
+ "learning_rate": 0.00012391304347826086,
229
+ "loss": 0.1232,
230
+ "step": 280
231
+ },
232
+ {
233
+ "epoch": 0.76,
234
+ "eval_accuracy": 0.9741144414168937,
235
+ "eval_loss": 0.08246026188135147,
236
+ "eval_runtime": 17.6614,
237
+ "eval_samples_per_second": 41.56,
238
+ "eval_steps_per_second": 5.209,
239
+ "step": 280
240
+ },
241
+ {
242
+ "epoch": 0.79,
243
+ "learning_rate": 0.00012119565217391304,
244
+ "loss": 0.0745,
245
+ "step": 290
246
+ },
247
+ {
248
+ "epoch": 0.82,
249
+ "learning_rate": 0.00011847826086956522,
250
+ "loss": 0.1076,
251
+ "step": 300
252
+ },
253
+ {
254
+ "epoch": 0.84,
255
+ "learning_rate": 0.00011576086956521739,
256
+ "loss": 0.0651,
257
+ "step": 310
258
+ },
259
+ {
260
+ "epoch": 0.87,
261
+ "learning_rate": 0.00011304347826086956,
262
+ "loss": 0.0782,
263
+ "step": 320
264
+ },
265
+ {
266
+ "epoch": 0.87,
267
+ "eval_accuracy": 0.9741144414168937,
268
+ "eval_loss": 0.09356382489204407,
269
+ "eval_runtime": 17.6794,
270
+ "eval_samples_per_second": 41.517,
271
+ "eval_steps_per_second": 5.204,
272
+ "step": 320
273
+ },
274
+ {
275
+ "epoch": 0.9,
276
+ "learning_rate": 0.00011032608695652174,
277
+ "loss": 0.115,
278
+ "step": 330
279
+ },
280
+ {
281
+ "epoch": 0.92,
282
+ "learning_rate": 0.0001076086956521739,
283
+ "loss": 0.0635,
284
+ "step": 340
285
+ },
286
+ {
287
+ "epoch": 0.95,
288
+ "learning_rate": 0.0001048913043478261,
289
+ "loss": 0.1061,
290
+ "step": 350
291
+ },
292
+ {
293
+ "epoch": 0.98,
294
+ "learning_rate": 0.00010217391304347828,
295
+ "loss": 0.1171,
296
+ "step": 360
297
+ },
298
+ {
299
+ "epoch": 0.98,
300
+ "eval_accuracy": 0.9768392370572208,
301
+ "eval_loss": 0.06765041500329971,
302
+ "eval_runtime": 18.1237,
303
+ "eval_samples_per_second": 40.499,
304
+ "eval_steps_per_second": 5.076,
305
+ "step": 360
306
+ },
307
+ {
308
+ "epoch": 1.01,
309
+ "learning_rate": 9.945652173913043e-05,
310
+ "loss": 0.086,
311
+ "step": 370
312
+ },
313
+ {
314
+ "epoch": 1.03,
315
+ "learning_rate": 9.673913043478261e-05,
316
+ "loss": 0.0685,
317
+ "step": 380
318
+ },
319
+ {
320
+ "epoch": 1.06,
321
+ "learning_rate": 9.402173913043478e-05,
322
+ "loss": 0.052,
323
+ "step": 390
324
+ },
325
+ {
326
+ "epoch": 1.09,
327
+ "learning_rate": 9.130434782608696e-05,
328
+ "loss": 0.0549,
329
+ "step": 400
330
+ },
331
+ {
332
+ "epoch": 1.09,
333
+ "eval_accuracy": 0.9727520435967303,
334
+ "eval_loss": 0.06371203064918518,
335
+ "eval_runtime": 17.6078,
336
+ "eval_samples_per_second": 41.686,
337
+ "eval_steps_per_second": 5.225,
338
+ "step": 400
339
+ },
340
+ {
341
+ "epoch": 1.11,
342
+ "learning_rate": 8.858695652173914e-05,
343
+ "loss": 0.058,
344
+ "step": 410
345
+ },
346
+ {
347
+ "epoch": 1.14,
348
+ "learning_rate": 8.586956521739131e-05,
349
+ "loss": 0.1215,
350
+ "step": 420
351
+ },
352
+ {
353
+ "epoch": 1.17,
354
+ "learning_rate": 8.315217391304349e-05,
355
+ "loss": 0.1048,
356
+ "step": 430
357
+ },
358
+ {
359
+ "epoch": 1.2,
360
+ "learning_rate": 8.043478260869566e-05,
361
+ "loss": 0.0603,
362
+ "step": 440
363
+ },
364
+ {
365
+ "epoch": 1.2,
366
+ "eval_accuracy": 0.9768392370572208,
367
+ "eval_loss": 0.06182762607932091,
368
+ "eval_runtime": 18.5478,
369
+ "eval_samples_per_second": 39.574,
370
+ "eval_steps_per_second": 4.96,
371
+ "step": 440
372
+ },
373
+ {
374
+ "epoch": 1.22,
375
+ "learning_rate": 7.771739130434783e-05,
376
+ "loss": 0.0648,
377
+ "step": 450
378
+ },
379
+ {
380
+ "epoch": 1.25,
381
+ "learning_rate": 7.500000000000001e-05,
382
+ "loss": 0.0512,
383
+ "step": 460
384
+ },
385
+ {
386
+ "epoch": 1.28,
387
+ "learning_rate": 7.228260869565217e-05,
388
+ "loss": 0.0867,
389
+ "step": 470
390
+ },
391
+ {
392
+ "epoch": 1.3,
393
+ "learning_rate": 6.956521739130436e-05,
394
+ "loss": 0.049,
395
+ "step": 480
396
+ },
397
+ {
398
+ "epoch": 1.3,
399
+ "eval_accuracy": 0.9768392370572208,
400
+ "eval_loss": 0.057097259908914566,
401
+ "eval_runtime": 17.4967,
402
+ "eval_samples_per_second": 41.951,
403
+ "eval_steps_per_second": 5.258,
404
+ "step": 480
405
+ },
406
+ {
407
+ "epoch": 1.33,
408
+ "learning_rate": 6.684782608695652e-05,
409
+ "loss": 0.0389,
410
+ "step": 490
411
+ },
412
+ {
413
+ "epoch": 1.36,
414
+ "learning_rate": 6.413043478260869e-05,
415
+ "loss": 0.087,
416
+ "step": 500
417
+ },
418
+ {
419
+ "epoch": 1.39,
420
+ "learning_rate": 6.141304347826087e-05,
421
+ "loss": 0.0733,
422
+ "step": 510
423
+ },
424
+ {
425
+ "epoch": 1.41,
426
+ "learning_rate": 5.869565217391305e-05,
427
+ "loss": 0.0199,
428
+ "step": 520
429
+ },
430
+ {
431
+ "epoch": 1.41,
432
+ "eval_accuracy": 0.9727520435967303,
433
+ "eval_loss": 0.07243036478757858,
434
+ "eval_runtime": 17.9356,
435
+ "eval_samples_per_second": 40.924,
436
+ "eval_steps_per_second": 5.129,
437
+ "step": 520
438
+ },
439
+ {
440
+ "epoch": 1.44,
441
+ "learning_rate": 5.5978260869565226e-05,
442
+ "loss": 0.0629,
443
+ "step": 530
444
+ },
445
+ {
446
+ "epoch": 1.47,
447
+ "learning_rate": 5.32608695652174e-05,
448
+ "loss": 0.0752,
449
+ "step": 540
450
+ },
451
+ {
452
+ "epoch": 1.49,
453
+ "learning_rate": 5.054347826086957e-05,
454
+ "loss": 0.0894,
455
+ "step": 550
456
+ },
457
+ {
458
+ "epoch": 1.52,
459
+ "learning_rate": 4.782608695652174e-05,
460
+ "loss": 0.0787,
461
+ "step": 560
462
+ },
463
+ {
464
+ "epoch": 1.52,
465
+ "eval_accuracy": 0.9754768392370572,
466
+ "eval_loss": 0.06181642785668373,
467
+ "eval_runtime": 17.5051,
468
+ "eval_samples_per_second": 41.931,
469
+ "eval_steps_per_second": 5.256,
470
+ "step": 560
471
+ },
472
+ {
473
+ "epoch": 1.55,
474
+ "learning_rate": 4.5108695652173916e-05,
475
+ "loss": 0.1088,
476
+ "step": 570
477
+ },
478
+ {
479
+ "epoch": 1.58,
480
+ "learning_rate": 4.239130434782609e-05,
481
+ "loss": 0.0586,
482
+ "step": 580
483
+ },
484
+ {
485
+ "epoch": 1.6,
486
+ "learning_rate": 3.9673913043478264e-05,
487
+ "loss": 0.0972,
488
+ "step": 590
489
+ },
490
+ {
491
+ "epoch": 1.63,
492
+ "learning_rate": 3.695652173913043e-05,
493
+ "loss": 0.049,
494
+ "step": 600
495
+ },
496
+ {
497
+ "epoch": 1.63,
498
+ "eval_accuracy": 0.9754768392370572,
499
+ "eval_loss": 0.058572325855493546,
500
+ "eval_runtime": 19.2109,
501
+ "eval_samples_per_second": 38.207,
502
+ "eval_steps_per_second": 4.789,
503
+ "step": 600
504
+ },
505
+ {
506
+ "epoch": 1.66,
507
+ "learning_rate": 3.423913043478261e-05,
508
+ "loss": 0.0505,
509
+ "step": 610
510
+ },
511
+ {
512
+ "epoch": 1.68,
513
+ "learning_rate": 3.152173913043479e-05,
514
+ "loss": 0.0226,
515
+ "step": 620
516
+ },
517
+ {
518
+ "epoch": 1.71,
519
+ "learning_rate": 2.8804347826086957e-05,
520
+ "loss": 0.0754,
521
+ "step": 630
522
+ },
523
+ {
524
+ "epoch": 1.74,
525
+ "learning_rate": 2.608695652173913e-05,
526
+ "loss": 0.0356,
527
+ "step": 640
528
+ },
529
+ {
530
+ "epoch": 1.74,
531
+ "eval_accuracy": 0.9754768392370572,
532
+ "eval_loss": 0.05890597403049469,
533
+ "eval_runtime": 17.4571,
534
+ "eval_samples_per_second": 42.046,
535
+ "eval_steps_per_second": 5.27,
536
+ "step": 640
537
+ },
538
+ {
539
+ "epoch": 1.77,
540
+ "learning_rate": 2.3369565217391306e-05,
541
+ "loss": 0.0261,
542
+ "step": 650
543
+ },
544
+ {
545
+ "epoch": 1.79,
546
+ "learning_rate": 2.065217391304348e-05,
547
+ "loss": 0.1149,
548
+ "step": 660
549
+ },
550
+ {
551
+ "epoch": 1.82,
552
+ "learning_rate": 1.793478260869565e-05,
553
+ "loss": 0.0605,
554
+ "step": 670
555
+ },
556
+ {
557
+ "epoch": 1.85,
558
+ "learning_rate": 1.5217391304347828e-05,
559
+ "loss": 0.0761,
560
+ "step": 680
561
+ },
562
+ {
563
+ "epoch": 1.85,
564
+ "eval_accuracy": 0.9754768392370572,
565
+ "eval_loss": 0.062350884079933167,
566
+ "eval_runtime": 17.6913,
567
+ "eval_samples_per_second": 41.489,
568
+ "eval_steps_per_second": 5.2,
569
+ "step": 680
570
+ },
571
+ {
572
+ "epoch": 1.88,
573
+ "learning_rate": 1.25e-05,
574
+ "loss": 0.1011,
575
+ "step": 690
576
+ },
577
+ {
578
+ "epoch": 1.9,
579
+ "learning_rate": 9.782608695652175e-06,
580
+ "loss": 0.0473,
581
+ "step": 700
582
+ },
583
+ {
584
+ "epoch": 1.93,
585
+ "learning_rate": 7.065217391304347e-06,
586
+ "loss": 0.0595,
587
+ "step": 710
588
+ },
589
+ {
590
+ "epoch": 1.96,
591
+ "learning_rate": 4.347826086956522e-06,
592
+ "loss": 0.0566,
593
+ "step": 720
594
+ },
595
+ {
596
+ "epoch": 1.96,
597
+ "eval_accuracy": 0.9754768392370572,
598
+ "eval_loss": 0.059235844761133194,
599
+ "eval_runtime": 17.9656,
600
+ "eval_samples_per_second": 40.856,
601
+ "eval_steps_per_second": 5.121,
602
+ "step": 720
603
+ }
604
+ ],
605
+ "max_steps": 736,
606
+ "num_train_epochs": 2,
607
+ "total_flos": 8.915533311547699e+17,
608
+ "trial_name": null,
609
+ "trial_params": null
610
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94024797e7d1316e62c6bf965da1ed7e576bfdede5f093d36faf5554fa254f21
3
+ size 3963