|
{ |
|
"best_metric": 0.7926267281105991, |
|
"best_model_checkpoint": "videomae-base-finetuned-subset-0401/checkpoint-896", |
|
"epoch": 49.01117117117117, |
|
"eval_steps": 500, |
|
"global_step": 2775, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7985611510791366e-06, |
|
"loss": 1.6857, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.5971223021582732e-06, |
|
"loss": 1.6911, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.3956834532374105e-06, |
|
"loss": 1.6168, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.1942446043165465e-06, |
|
"loss": 1.6689, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.992805755395683e-06, |
|
"loss": 1.6048, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.08294930875576037, |
|
"eval_loss": 1.6212950944900513, |
|
"eval_runtime": 176.5128, |
|
"eval_samples_per_second": 1.229, |
|
"eval_steps_per_second": 0.159, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0791366906474821e-05, |
|
"loss": 1.6118, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.2589928057553957e-05, |
|
"loss": 1.6008, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.4388489208633093e-05, |
|
"loss": 1.5996, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.618705035971223e-05, |
|
"loss": 1.6264, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.7985611510791367e-05, |
|
"loss": 1.6071, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.9784172661870504e-05, |
|
"loss": 1.5891, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.28110599078341014, |
|
"eval_loss": 1.5230350494384766, |
|
"eval_runtime": 169.5488, |
|
"eval_samples_per_second": 1.28, |
|
"eval_steps_per_second": 0.165, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.1582733812949642e-05, |
|
"loss": 1.5625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.3381294964028776e-05, |
|
"loss": 1.5854, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.5179856115107914e-05, |
|
"loss": 1.6007, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.697841726618705e-05, |
|
"loss": 1.5949, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.8776978417266186e-05, |
|
"loss": 1.4797, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.19815668202764977, |
|
"eval_loss": 1.6437386274337769, |
|
"eval_runtime": 169.6444, |
|
"eval_samples_per_second": 1.279, |
|
"eval_steps_per_second": 0.165, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.0575539568345324e-05, |
|
"loss": 1.5867, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.237410071942446e-05, |
|
"loss": 1.4592, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.41726618705036e-05, |
|
"loss": 1.3124, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.597122302158273e-05, |
|
"loss": 1.3075, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.776978417266187e-05, |
|
"loss": 1.1932, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.956834532374101e-05, |
|
"loss": 1.3999, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.7465437788018433, |
|
"eval_loss": 0.9262827634811401, |
|
"eval_runtime": 168.7616, |
|
"eval_samples_per_second": 1.286, |
|
"eval_steps_per_second": 0.166, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.136690647482014e-05, |
|
"loss": 1.2996, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.3165467625899284e-05, |
|
"loss": 1.313, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.496402877697842e-05, |
|
"loss": 1.2644, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.676258992805755e-05, |
|
"loss": 1.1732, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.8561151079136694e-05, |
|
"loss": 1.125, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.9959951942330803e-05, |
|
"loss": 1.0917, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_accuracy": 0.4930875576036866, |
|
"eval_loss": 1.2307826280593872, |
|
"eval_runtime": 169.9213, |
|
"eval_samples_per_second": 1.277, |
|
"eval_steps_per_second": 0.165, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.975971165398479e-05, |
|
"loss": 1.2046, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.955947136563877e-05, |
|
"loss": 1.2355, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.935923107729276e-05, |
|
"loss": 1.3358, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.915899078894674e-05, |
|
"loss": 1.1835, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.895875050060073e-05, |
|
"loss": 1.238, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_accuracy": 0.6589861751152074, |
|
"eval_loss": 0.940632700920105, |
|
"eval_runtime": 168.194, |
|
"eval_samples_per_second": 1.29, |
|
"eval_steps_per_second": 0.166, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.875851021225471e-05, |
|
"loss": 1.3625, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.8558269923908696e-05, |
|
"loss": 1.2342, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.835802963556268e-05, |
|
"loss": 1.0534, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.8157789347216665e-05, |
|
"loss": 1.1462, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.795754905887065e-05, |
|
"loss": 1.0197, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.7757308770524635e-05, |
|
"loss": 1.1525, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_accuracy": 0.7050691244239631, |
|
"eval_loss": 0.8809296488761902, |
|
"eval_runtime": 168.4512, |
|
"eval_samples_per_second": 1.288, |
|
"eval_steps_per_second": 0.166, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.755706848217862e-05, |
|
"loss": 1.0159, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 4.7356828193832604e-05, |
|
"loss": 0.9716, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 4.715658790548659e-05, |
|
"loss": 0.9998, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 4.6956347617140574e-05, |
|
"loss": 1.1153, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 4.675610732879456e-05, |
|
"loss": 1.0806, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_accuracy": 0.5944700460829493, |
|
"eval_loss": 1.0089017152786255, |
|
"eval_runtime": 171.2435, |
|
"eval_samples_per_second": 1.267, |
|
"eval_steps_per_second": 0.164, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.655586704044854e-05, |
|
"loss": 1.1055, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.635562675210253e-05, |
|
"loss": 0.9111, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 4.615538646375651e-05, |
|
"loss": 1.0937, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 4.59551461754105e-05, |
|
"loss": 1.1375, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 4.575490588706448e-05, |
|
"loss": 0.8357, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 4.5554665598718466e-05, |
|
"loss": 0.8483, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_accuracy": 0.5852534562211982, |
|
"eval_loss": 0.9700098037719727, |
|
"eval_runtime": 171.1205, |
|
"eval_samples_per_second": 1.268, |
|
"eval_steps_per_second": 0.164, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.535442531037245e-05, |
|
"loss": 0.9293, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.5154185022026436e-05, |
|
"loss": 1.1123, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.495394473368042e-05, |
|
"loss": 1.0562, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.4753704445334405e-05, |
|
"loss": 0.8454, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.455346415698839e-05, |
|
"loss": 0.8096, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.4353223868642374e-05, |
|
"loss": 0.992, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_accuracy": 0.48847926267281105, |
|
"eval_loss": 1.1879829168319702, |
|
"eval_runtime": 168.7686, |
|
"eval_samples_per_second": 1.286, |
|
"eval_steps_per_second": 0.166, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.415298358029636e-05, |
|
"loss": 1.0682, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 4.3952743291950344e-05, |
|
"loss": 0.954, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 4.375250300360433e-05, |
|
"loss": 0.9788, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 4.355226271525831e-05, |
|
"loss": 0.975, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 4.33520224269123e-05, |
|
"loss": 0.862, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"eval_accuracy": 0.7511520737327189, |
|
"eval_loss": 0.7174272537231445, |
|
"eval_runtime": 168.7207, |
|
"eval_samples_per_second": 1.286, |
|
"eval_steps_per_second": 0.166, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.315178213856628e-05, |
|
"loss": 0.7987, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.295154185022027e-05, |
|
"loss": 0.7351, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.275130156187425e-05, |
|
"loss": 0.8315, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.2551061273528236e-05, |
|
"loss": 0.92, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 4.235082098518222e-05, |
|
"loss": 0.6532, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 4.2150580696836206e-05, |
|
"loss": 1.0694, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_loss": 0.8598370552062988, |
|
"eval_runtime": 168.9431, |
|
"eval_samples_per_second": 1.284, |
|
"eval_steps_per_second": 0.166, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.195034040849019e-05, |
|
"loss": 0.7898, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 4.1750100120144175e-05, |
|
"loss": 0.8624, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 4.154985983179816e-05, |
|
"loss": 0.9415, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 4.1349619543452144e-05, |
|
"loss": 0.806, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 4.114937925510613e-05, |
|
"loss": 0.8885, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"eval_accuracy": 0.7096774193548387, |
|
"eval_loss": 0.8289774656295776, |
|
"eval_runtime": 169.3457, |
|
"eval_samples_per_second": 1.281, |
|
"eval_steps_per_second": 0.165, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 4.0949138966760114e-05, |
|
"loss": 0.8971, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 4.07488986784141e-05, |
|
"loss": 0.752, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 4.054865839006808e-05, |
|
"loss": 0.9724, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 4.034841810172207e-05, |
|
"loss": 0.9637, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 4.014817781337605e-05, |
|
"loss": 1.0046, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 3.994793752503004e-05, |
|
"loss": 0.8965, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_loss": 0.8304111361503601, |
|
"eval_runtime": 171.3216, |
|
"eval_samples_per_second": 1.267, |
|
"eval_steps_per_second": 0.163, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.974769723668402e-05, |
|
"loss": 0.9904, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 3.9547456948338006e-05, |
|
"loss": 0.9846, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 3.934721665999199e-05, |
|
"loss": 0.9145, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 3.9146976371645976e-05, |
|
"loss": 0.7486, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 3.894673608329996e-05, |
|
"loss": 0.7721, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 3.8746495794953945e-05, |
|
"loss": 0.7371, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"eval_accuracy": 0.7695852534562212, |
|
"eval_loss": 0.7009211778640747, |
|
"eval_runtime": 168.9295, |
|
"eval_samples_per_second": 1.285, |
|
"eval_steps_per_second": 0.166, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.854625550660793e-05, |
|
"loss": 0.923, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 3.8346015218261915e-05, |
|
"loss": 0.9008, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 3.81457749299159e-05, |
|
"loss": 0.7429, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 3.7945534641569884e-05, |
|
"loss": 0.6912, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 3.774529435322387e-05, |
|
"loss": 0.6872, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"eval_accuracy": 0.7926267281105991, |
|
"eval_loss": 0.6768301129341125, |
|
"eval_runtime": 170.318, |
|
"eval_samples_per_second": 1.274, |
|
"eval_steps_per_second": 0.164, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.754505406487785e-05, |
|
"loss": 0.636, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 3.734481377653184e-05, |
|
"loss": 0.7347, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 3.714457348818582e-05, |
|
"loss": 0.7698, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 3.694433319983981e-05, |
|
"loss": 0.7495, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 3.674409291149379e-05, |
|
"loss": 0.725, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 3.6543852623147777e-05, |
|
"loss": 0.6022, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"eval_accuracy": 0.7373271889400922, |
|
"eval_loss": 0.7512706518173218, |
|
"eval_runtime": 168.81, |
|
"eval_samples_per_second": 1.285, |
|
"eval_steps_per_second": 0.166, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.634361233480176e-05, |
|
"loss": 0.8774, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 3.6143372046455746e-05, |
|
"loss": 0.6907, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 3.594313175810973e-05, |
|
"loss": 0.6035, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 3.5742891469763715e-05, |
|
"loss": 0.7363, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 3.55426511814177e-05, |
|
"loss": 0.9308, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"eval_accuracy": 0.7096774193548387, |
|
"eval_loss": 0.8054620623588562, |
|
"eval_runtime": 169.493, |
|
"eval_samples_per_second": 1.28, |
|
"eval_steps_per_second": 0.165, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.5342410893071685e-05, |
|
"loss": 0.73, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.514217060472567e-05, |
|
"loss": 0.9107, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 3.4941930316379654e-05, |
|
"loss": 0.739, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 3.474169002803364e-05, |
|
"loss": 0.8472, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 3.454144973968762e-05, |
|
"loss": 0.7395, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 3.434120945134161e-05, |
|
"loss": 0.4456, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"eval_accuracy": 0.6728110599078341, |
|
"eval_loss": 0.7876001000404358, |
|
"eval_runtime": 172.0864, |
|
"eval_samples_per_second": 1.261, |
|
"eval_steps_per_second": 0.163, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 3.414096916299559e-05, |
|
"loss": 0.581, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 3.394072887464958e-05, |
|
"loss": 1.0824, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 3.374048858630356e-05, |
|
"loss": 0.6623, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 3.354024829795755e-05, |
|
"loss": 0.6078, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 3.334000800961153e-05, |
|
"loss": 0.6867, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 3.3139767721265516e-05, |
|
"loss": 0.6802, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"eval_accuracy": 0.7235023041474654, |
|
"eval_loss": 0.722358763217926, |
|
"eval_runtime": 170.3701, |
|
"eval_samples_per_second": 1.274, |
|
"eval_steps_per_second": 0.164, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 3.29395274329195e-05, |
|
"loss": 0.6218, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 3.2739287144573485e-05, |
|
"loss": 0.6368, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 3.253904685622747e-05, |
|
"loss": 0.7407, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 3.2338806567881455e-05, |
|
"loss": 0.6189, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 3.213856627953544e-05, |
|
"loss": 0.7154, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"eval_accuracy": 0.7050691244239631, |
|
"eval_loss": 0.7434002161026001, |
|
"eval_runtime": 168.861, |
|
"eval_samples_per_second": 1.285, |
|
"eval_steps_per_second": 0.166, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 3.1938325991189424e-05, |
|
"loss": 0.6084, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 3.173808570284341e-05, |
|
"loss": 0.656, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 3.1537845414497393e-05, |
|
"loss": 0.7274, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 3.133760512615138e-05, |
|
"loss": 0.5818, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"learning_rate": 3.113736483780536e-05, |
|
"loss": 0.8351, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"learning_rate": 3.0937124549459354e-05, |
|
"loss": 0.503, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"eval_accuracy": 0.695852534562212, |
|
"eval_loss": 0.8345744609832764, |
|
"eval_runtime": 168.411, |
|
"eval_samples_per_second": 1.289, |
|
"eval_steps_per_second": 0.166, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 3.073688426111334e-05, |
|
"loss": 0.5257, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 3.0536643972767324e-05, |
|
"loss": 0.511, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 3.0336403684421305e-05, |
|
"loss": 0.6193, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 3.013616339607529e-05, |
|
"loss": 0.6704, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"learning_rate": 2.9935923107729274e-05, |
|
"loss": 0.7203, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"eval_accuracy": 0.5990783410138248, |
|
"eval_loss": 0.9694386720657349, |
|
"eval_runtime": 169.6323, |
|
"eval_samples_per_second": 1.279, |
|
"eval_steps_per_second": 0.165, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2.973568281938326e-05, |
|
"loss": 0.605, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2.9535442531037244e-05, |
|
"loss": 0.6581, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 2.9335202242691228e-05, |
|
"loss": 0.5423, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 2.9134961954345213e-05, |
|
"loss": 0.6137, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"learning_rate": 2.8934721665999198e-05, |
|
"loss": 0.562, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"learning_rate": 2.8734481377653182e-05, |
|
"loss": 0.6799, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"eval_accuracy": 0.7695852534562212, |
|
"eval_loss": 0.647365152835846, |
|
"eval_runtime": 168.7434, |
|
"eval_samples_per_second": 1.286, |
|
"eval_steps_per_second": 0.166, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2.8534241089307167e-05, |
|
"loss": 0.5556, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 2.833400080096115e-05, |
|
"loss": 0.7249, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 2.8133760512615136e-05, |
|
"loss": 0.563, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 2.793352022426912e-05, |
|
"loss": 0.6947, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 2.7733279935923106e-05, |
|
"loss": 0.8462, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 2.753303964757709e-05, |
|
"loss": 0.5802, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"eval_accuracy": 0.6359447004608295, |
|
"eval_loss": 0.957251787185669, |
|
"eval_runtime": 168.4758, |
|
"eval_samples_per_second": 1.288, |
|
"eval_steps_per_second": 0.166, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2.7332799359231075e-05, |
|
"loss": 0.5186, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 2.713255907088506e-05, |
|
"loss": 0.5462, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 2.6932318782539044e-05, |
|
"loss": 0.6317, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 2.673207849419303e-05, |
|
"loss": 0.5265, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"learning_rate": 2.6531838205847014e-05, |
|
"loss": 0.7047, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"eval_accuracy": 0.695852534562212, |
|
"eval_loss": 0.9120410680770874, |
|
"eval_runtime": 168.5578, |
|
"eval_samples_per_second": 1.287, |
|
"eval_steps_per_second": 0.166, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 2.6331597917501e-05, |
|
"loss": 0.5364, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 2.613135762915499e-05, |
|
"loss": 0.5322, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 2.5931117340808974e-05, |
|
"loss": 0.897, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 2.573087705246296e-05, |
|
"loss": 0.4983, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"learning_rate": 2.5530636764116944e-05, |
|
"loss": 0.7582, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"learning_rate": 2.533039647577093e-05, |
|
"loss": 0.6701, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"eval_accuracy": 0.5852534562211982, |
|
"eval_loss": 1.1690046787261963, |
|
"eval_runtime": 169.2732, |
|
"eval_samples_per_second": 1.282, |
|
"eval_steps_per_second": 0.165, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 2.5130156187424913e-05, |
|
"loss": 0.4735, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 2.4929915899078894e-05, |
|
"loss": 0.6383, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 2.472967561073288e-05, |
|
"loss": 0.7644, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 2.4529435322386864e-05, |
|
"loss": 0.6395, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 2.432919503404085e-05, |
|
"loss": 0.5514, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"eval_accuracy": 0.6866359447004609, |
|
"eval_loss": 0.9173929691314697, |
|
"eval_runtime": 169.8137, |
|
"eval_samples_per_second": 1.278, |
|
"eval_steps_per_second": 0.165, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2.4128954745694833e-05, |
|
"loss": 0.5936, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2.3928714457348818e-05, |
|
"loss": 0.4936, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 2.3728474169002803e-05, |
|
"loss": 0.6134, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 2.3528233880656787e-05, |
|
"loss": 0.654, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 2.3327993592310772e-05, |
|
"loss": 0.6997, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 2.3127753303964757e-05, |
|
"loss": 0.538, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"eval_accuracy": 0.6866359447004609, |
|
"eval_loss": 0.854290246963501, |
|
"eval_runtime": 171.1701, |
|
"eval_samples_per_second": 1.268, |
|
"eval_steps_per_second": 0.164, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2.292751301561874e-05, |
|
"loss": 0.6431, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 2.272727272727273e-05, |
|
"loss": 0.6269, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 2.2527032438926714e-05, |
|
"loss": 0.6239, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 2.23267921505807e-05, |
|
"loss": 0.5901, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 2.2126551862234683e-05, |
|
"loss": 0.5673, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 2.1926311573888668e-05, |
|
"loss": 0.7226, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"eval_accuracy": 0.7465437788018433, |
|
"eval_loss": 0.7773527503013611, |
|
"eval_runtime": 170.9184, |
|
"eval_samples_per_second": 1.27, |
|
"eval_steps_per_second": 0.164, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2.1726071285542653e-05, |
|
"loss": 0.5368, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.1525830997196637e-05, |
|
"loss": 0.6407, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.1325590708850622e-05, |
|
"loss": 0.6071, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.1125350420504607e-05, |
|
"loss": 0.587, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"learning_rate": 2.092511013215859e-05, |
|
"loss": 0.4459, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"eval_accuracy": 0.6359447004608295, |
|
"eval_loss": 0.9134915471076965, |
|
"eval_runtime": 169.6863, |
|
"eval_samples_per_second": 1.279, |
|
"eval_steps_per_second": 0.165, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2.0724869843812576e-05, |
|
"loss": 0.6281, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.052462955546656e-05, |
|
"loss": 0.5999, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.0324389267120545e-05, |
|
"loss": 0.4052, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.012414897877453e-05, |
|
"loss": 0.7613, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"learning_rate": 1.9923908690428515e-05, |
|
"loss": 0.6318, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"learning_rate": 1.9723668402082503e-05, |
|
"loss": 0.3905, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"eval_accuracy": 0.6728110599078341, |
|
"eval_loss": 0.8585994243621826, |
|
"eval_runtime": 165.9142, |
|
"eval_samples_per_second": 1.308, |
|
"eval_steps_per_second": 0.169, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 1.9523428113736487e-05, |
|
"loss": 0.5, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 1.9323187825390472e-05, |
|
"loss": 0.4816, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 1.9122947537044457e-05, |
|
"loss": 0.4979, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 1.892270724869844e-05, |
|
"loss": 0.6203, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 32.02, |
|
"learning_rate": 1.8722466960352426e-05, |
|
"loss": 0.7071, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 32.02, |
|
"eval_accuracy": 0.7327188940092166, |
|
"eval_loss": 0.7919384837150574, |
|
"eval_runtime": 171.3033, |
|
"eval_samples_per_second": 1.267, |
|
"eval_steps_per_second": 0.163, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 1.852222667200641e-05, |
|
"loss": 0.5632, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 1.8321986383660396e-05, |
|
"loss": 0.6323, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 1.812174609531438e-05, |
|
"loss": 0.6151, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 1.7921505806968365e-05, |
|
"loss": 0.4412, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 1.772126551862235e-05, |
|
"loss": 0.4863, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 1.7521025230276334e-05, |
|
"loss": 0.4983, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"eval_accuracy": 0.7511520737327189, |
|
"eval_loss": 0.7506622672080994, |
|
"eval_runtime": 169.3822, |
|
"eval_samples_per_second": 1.281, |
|
"eval_steps_per_second": 0.165, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 1.732078494193032e-05, |
|
"loss": 0.4416, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 1.7120544653584304e-05, |
|
"loss": 0.5567, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 1.6920304365238288e-05, |
|
"loss": 0.3794, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 1.6720064076892273e-05, |
|
"loss": 0.5672, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"learning_rate": 1.6519823788546258e-05, |
|
"loss": 0.4093, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"learning_rate": 1.6319583500200242e-05, |
|
"loss": 0.5654, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_loss": 0.767917275428772, |
|
"eval_runtime": 173.7976, |
|
"eval_samples_per_second": 1.249, |
|
"eval_steps_per_second": 0.161, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 1.6119343211854227e-05, |
|
"loss": 0.3909, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 1.591910292350821e-05, |
|
"loss": 0.5576, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 1.5718862635162196e-05, |
|
"loss": 0.4247, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 1.551862234681618e-05, |
|
"loss": 0.747, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"learning_rate": 1.5318382058470166e-05, |
|
"loss": 0.5569, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"eval_accuracy": 0.7096774193548387, |
|
"eval_loss": 0.8438030481338501, |
|
"eval_runtime": 168.1341, |
|
"eval_samples_per_second": 1.291, |
|
"eval_steps_per_second": 0.167, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 1.511814177012415e-05, |
|
"loss": 0.5796, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 1.4917901481778135e-05, |
|
"loss": 0.6004, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 1.471766119343212e-05, |
|
"loss": 0.4482, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 1.4517420905086104e-05, |
|
"loss": 0.593, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"learning_rate": 1.4317180616740089e-05, |
|
"loss": 0.6642, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"learning_rate": 1.4116940328394074e-05, |
|
"loss": 0.3998, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"eval_accuracy": 0.7188940092165899, |
|
"eval_loss": 0.8691464066505432, |
|
"eval_runtime": 171.2911, |
|
"eval_samples_per_second": 1.267, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 1.3916700040048058e-05, |
|
"loss": 0.3557, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 1.3716459751702043e-05, |
|
"loss": 0.3628, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 1.3516219463356028e-05, |
|
"loss": 0.335, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 1.3315979175010012e-05, |
|
"loss": 0.6813, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"learning_rate": 1.3115738886663997e-05, |
|
"loss": 0.5341, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"eval_accuracy": 0.7603686635944701, |
|
"eval_loss": 0.8056049346923828, |
|
"eval_runtime": 172.8523, |
|
"eval_samples_per_second": 1.255, |
|
"eval_steps_per_second": 0.162, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 1.2915498598317982e-05, |
|
"loss": 0.4075, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 1.2715258309971966e-05, |
|
"loss": 0.5339, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 1.2515018021625951e-05, |
|
"loss": 0.4681, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 1.2314777733279936e-05, |
|
"loss": 0.4725, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"learning_rate": 1.211453744493392e-05, |
|
"loss": 0.5588, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"learning_rate": 1.1914297156587905e-05, |
|
"loss": 0.4024, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"eval_accuracy": 0.7880184331797235, |
|
"eval_loss": 0.7070650458335876, |
|
"eval_runtime": 170.228, |
|
"eval_samples_per_second": 1.275, |
|
"eval_steps_per_second": 0.164, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 1.171405686824189e-05, |
|
"loss": 0.4122, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 1.1513816579895874e-05, |
|
"loss": 0.3389, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 1.131357629154986e-05, |
|
"loss": 0.4993, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 1.1113336003203845e-05, |
|
"loss": 0.5869, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"learning_rate": 1.091309571485783e-05, |
|
"loss": 0.7077, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"learning_rate": 1.0712855426511815e-05, |
|
"loss": 0.5011, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"eval_accuracy": 0.7004608294930875, |
|
"eval_loss": 0.882685661315918, |
|
"eval_runtime": 169.5666, |
|
"eval_samples_per_second": 1.28, |
|
"eval_steps_per_second": 0.165, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 1.05126151381658e-05, |
|
"loss": 0.443, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 1.0312374849819784e-05, |
|
"loss": 0.5292, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 1.0112134561473769e-05, |
|
"loss": 0.4058, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 9.911894273127754e-06, |
|
"loss": 0.5513, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 40.02, |
|
"learning_rate": 9.71165398478174e-06, |
|
"loss": 0.5857, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 40.02, |
|
"eval_accuracy": 0.7096774193548387, |
|
"eval_loss": 0.8525260090827942, |
|
"eval_runtime": 169.961, |
|
"eval_samples_per_second": 1.277, |
|
"eval_steps_per_second": 0.165, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 9.511413696435725e-06, |
|
"loss": 0.6831, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 9.31117340808971e-06, |
|
"loss": 0.4212, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 9.110933119743694e-06, |
|
"loss": 0.4343, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 8.910692831397679e-06, |
|
"loss": 0.3912, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 41.02, |
|
"learning_rate": 8.710452543051663e-06, |
|
"loss": 0.5539, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 41.02, |
|
"learning_rate": 8.510212254705648e-06, |
|
"loss": 0.5619, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 41.02, |
|
"eval_accuracy": 0.7511520737327189, |
|
"eval_loss": 0.8228119611740112, |
|
"eval_runtime": 167.3356, |
|
"eval_samples_per_second": 1.297, |
|
"eval_steps_per_second": 0.167, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 8.309971966359633e-06, |
|
"loss": 0.3721, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 8.109731678013617e-06, |
|
"loss": 0.332, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 7.909491389667602e-06, |
|
"loss": 0.5869, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 7.709251101321587e-06, |
|
"loss": 0.4474, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 42.02, |
|
"learning_rate": 7.509010812975571e-06, |
|
"loss": 0.6052, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 42.02, |
|
"eval_accuracy": 0.7373271889400922, |
|
"eval_loss": 0.8320425152778625, |
|
"eval_runtime": 168.2726, |
|
"eval_samples_per_second": 1.29, |
|
"eval_steps_per_second": 0.166, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 7.308770524629556e-06, |
|
"loss": 0.451, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 7.108530236283541e-06, |
|
"loss": 0.282, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 6.908289947937525e-06, |
|
"loss": 0.4565, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 6.70804965959151e-06, |
|
"loss": 0.6579, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 43.02, |
|
"learning_rate": 6.507809371245495e-06, |
|
"loss": 0.3812, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 43.02, |
|
"learning_rate": 6.307569082899479e-06, |
|
"loss": 0.5124, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 43.02, |
|
"eval_accuracy": 0.7419354838709677, |
|
"eval_loss": 0.8776273131370544, |
|
"eval_runtime": 167.8781, |
|
"eval_samples_per_second": 1.293, |
|
"eval_steps_per_second": 0.167, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 6.107328794553464e-06, |
|
"loss": 0.2933, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 5.907088506207449e-06, |
|
"loss": 0.3212, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 5.706848217861433e-06, |
|
"loss": 0.47, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 5.506607929515419e-06, |
|
"loss": 0.3473, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"learning_rate": 5.306367641169404e-06, |
|
"loss": 0.5966, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"learning_rate": 5.106127352823388e-06, |
|
"loss": 0.3323, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"eval_accuracy": 0.7465437788018433, |
|
"eval_loss": 0.8515135645866394, |
|
"eval_runtime": 169.0764, |
|
"eval_samples_per_second": 1.283, |
|
"eval_steps_per_second": 0.166, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 4.905887064477374e-06, |
|
"loss": 0.5174, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 4.7056467761313585e-06, |
|
"loss": 0.405, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 4.505406487785343e-06, |
|
"loss": 0.5241, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 4.305166199439328e-06, |
|
"loss": 0.6818, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 45.02, |
|
"learning_rate": 4.1049259110933125e-06, |
|
"loss": 0.5684, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 45.02, |
|
"eval_accuracy": 0.7096774193548387, |
|
"eval_loss": 0.9309377074241638, |
|
"eval_runtime": 169.732, |
|
"eval_samples_per_second": 1.278, |
|
"eval_steps_per_second": 0.165, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 3.904685622747297e-06, |
|
"loss": 0.4354, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 3.704445334401282e-06, |
|
"loss": 0.3822, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 3.5042050460552665e-06, |
|
"loss": 0.5717, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 3.303964757709251e-06, |
|
"loss": 0.2871, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 46.02, |
|
"learning_rate": 3.103724469363236e-06, |
|
"loss": 0.5375, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 46.02, |
|
"learning_rate": 2.9034841810172205e-06, |
|
"loss": 0.4406, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 46.02, |
|
"eval_accuracy": 0.7465437788018433, |
|
"eval_loss": 0.8826016783714294, |
|
"eval_runtime": 169.4846, |
|
"eval_samples_per_second": 1.28, |
|
"eval_steps_per_second": 0.165, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 2.7032438926712056e-06, |
|
"loss": 0.5365, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 2.5030036043251903e-06, |
|
"loss": 0.3124, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 2.3027633159791754e-06, |
|
"loss": 0.3491, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 2.10252302763316e-06, |
|
"loss": 0.4527, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 47.02, |
|
"learning_rate": 1.9022827392871447e-06, |
|
"loss": 0.6164, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 47.02, |
|
"eval_accuracy": 0.695852534562212, |
|
"eval_loss": 0.8993560075759888, |
|
"eval_runtime": 168.0031, |
|
"eval_samples_per_second": 1.292, |
|
"eval_steps_per_second": 0.167, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 1.7020424509411294e-06, |
|
"loss": 0.2574, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 1.501802162595114e-06, |
|
"loss": 0.2911, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 1.301561874249099e-06, |
|
"loss": 0.4747, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 1.1013215859030839e-06, |
|
"loss": 0.4525, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"learning_rate": 9.010812975570685e-07, |
|
"loss": 0.351, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"learning_rate": 7.008410092110533e-07, |
|
"loss": 0.4549, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"eval_accuracy": 0.7188940092165899, |
|
"eval_loss": 0.8699988722801208, |
|
"eval_runtime": 168.7472, |
|
"eval_samples_per_second": 1.286, |
|
"eval_steps_per_second": 0.166, |
|
"step": 2744 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 5.006007208650381e-07, |
|
"loss": 0.4566, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 3.003604325190229e-07, |
|
"loss": 0.4441, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 1.0012014417300761e-07, |
|
"loss": 0.3453, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"eval_accuracy": 0.7188940092165899, |
|
"eval_loss": 0.8821613788604736, |
|
"eval_runtime": 168.772, |
|
"eval_samples_per_second": 1.286, |
|
"eval_steps_per_second": 0.166, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"step": 2775, |
|
"total_flos": 2.7602315185605673e+19, |
|
"train_loss": 0.7408499845298561, |
|
"train_runtime": 29540.8546, |
|
"train_samples_per_second": 0.752, |
|
"train_steps_per_second": 0.094 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"eval_accuracy": 0.7824074074074074, |
|
"eval_loss": 0.6378526091575623, |
|
"eval_runtime": 176.2386, |
|
"eval_samples_per_second": 1.226, |
|
"eval_steps_per_second": 0.153, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"eval_accuracy": 0.7824074074074074, |
|
"eval_loss": 0.6378525495529175, |
|
"eval_runtime": 181.9628, |
|
"eval_samples_per_second": 1.187, |
|
"eval_steps_per_second": 0.148, |
|
"step": 2775 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2775, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 2.7602315185605673e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|