|
{ |
|
"best_metric": 0.8525345622119815, |
|
"best_model_checkpoint": "videomae-base-finetuned-subset\\checkpoint-5900", |
|
"epoch": 59.00826210826211, |
|
"eval_steps": 500, |
|
"global_step": 7020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.122507122507123e-07, |
|
"loss": 1.1155, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.4245014245014246e-06, |
|
"loss": 1.1405, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.136752136752137e-06, |
|
"loss": 0.5289, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.8490028490028492e-06, |
|
"loss": 0.8832, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.5612535612535615e-06, |
|
"loss": 1.0172, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 0.7363, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.985754985754986e-06, |
|
"loss": 0.8444, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.6980056980056985e-06, |
|
"loss": 0.8984, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.41025641025641e-06, |
|
"loss": 1.085, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.122507122507123e-06, |
|
"loss": 0.7478, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.834757834757835e-06, |
|
"loss": 0.9225, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.7419354838709677, |
|
"eval_loss": 0.7362823486328125, |
|
"eval_runtime": 289.676, |
|
"eval_samples_per_second": 0.749, |
|
"eval_steps_per_second": 0.19, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 0.8071, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.4488, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.971509971509972e-06, |
|
"loss": 1.23, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0683760683760684e-05, |
|
"loss": 0.7655, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.1396011396011397e-05, |
|
"loss": 0.8764, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.2108262108262108e-05, |
|
"loss": 0.5023, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 0.5699, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.3532763532763535e-05, |
|
"loss": 0.8942, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.4245014245014246e-05, |
|
"loss": 0.7764, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.4957264957264958e-05, |
|
"loss": 0.8046, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.566951566951567e-05, |
|
"loss": 0.5461, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.6381766381766382e-05, |
|
"loss": 0.8357, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.7004608294930875, |
|
"eval_loss": 0.911893367767334, |
|
"eval_runtime": 306.0217, |
|
"eval_samples_per_second": 0.709, |
|
"eval_steps_per_second": 0.18, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.7094017094017095e-05, |
|
"loss": 0.575, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.7806267806267805e-05, |
|
"loss": 0.7452, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.7453, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.5861, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9943019943019945e-05, |
|
"loss": 0.4319, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.0655270655270654e-05, |
|
"loss": 0.532, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.1367521367521368e-05, |
|
"loss": 0.8728, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.207977207977208e-05, |
|
"loss": 1.1116, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.2792022792022794e-05, |
|
"loss": 0.9775, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.3504273504273504e-05, |
|
"loss": 1.1248, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.4216524216524217e-05, |
|
"loss": 0.7516, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.492877492877493e-05, |
|
"loss": 0.474, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.6820276497695853, |
|
"eval_loss": 0.9698442220687866, |
|
"eval_runtime": 313.4651, |
|
"eval_samples_per_second": 0.692, |
|
"eval_steps_per_second": 0.175, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.999, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.6353276353276356e-05, |
|
"loss": 0.758, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.706552706552707e-05, |
|
"loss": 0.6963, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.5529, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.8490028490028492e-05, |
|
"loss": 0.7653, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.9202279202279202e-05, |
|
"loss": 0.8444, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.9914529914529915e-05, |
|
"loss": 0.6609, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.0626780626780625e-05, |
|
"loss": 0.5681, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.133903133903134e-05, |
|
"loss": 0.7641, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 0.7485, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.2763532763532764e-05, |
|
"loss": 0.7458, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.347578347578348e-05, |
|
"loss": 0.7899, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.6774193548387096, |
|
"eval_loss": 1.135077714920044, |
|
"eval_runtime": 285.9321, |
|
"eval_samples_per_second": 0.759, |
|
"eval_steps_per_second": 0.192, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.418803418803419e-05, |
|
"loss": 0.4874, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.4900284900284904e-05, |
|
"loss": 0.6289, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.561253561253561e-05, |
|
"loss": 0.6611, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.6324786324786323e-05, |
|
"loss": 0.7506, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.777, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.774928774928775e-05, |
|
"loss": 0.9948, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.704, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.9173789173789176e-05, |
|
"loss": 0.8743, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.988603988603989e-05, |
|
"loss": 0.6572, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.05982905982906e-05, |
|
"loss": 1.0614, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.131054131054131e-05, |
|
"loss": 0.6374, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.202279202279202e-05, |
|
"loss": 0.9015, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_accuracy": 0.4976958525345622, |
|
"eval_loss": 1.3822749853134155, |
|
"eval_runtime": 285.2486, |
|
"eval_samples_per_second": 0.761, |
|
"eval_steps_per_second": 0.193, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.2735042735042735e-05, |
|
"loss": 1.1476, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.344729344729345e-05, |
|
"loss": 0.7482, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.415954415954416e-05, |
|
"loss": 0.835, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 0.5413, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.558404558404559e-05, |
|
"loss": 0.9714, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.4952, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.700854700854701e-05, |
|
"loss": 1.0349, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.772079772079772e-05, |
|
"loss": 0.8882, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.8433048433048433e-05, |
|
"loss": 0.9244, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.9145299145299147e-05, |
|
"loss": 1.1217, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.985754985754986e-05, |
|
"loss": 0.7402, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_accuracy": 0.695852534562212, |
|
"eval_loss": 0.8660895824432373, |
|
"eval_runtime": 289.3403, |
|
"eval_samples_per_second": 0.75, |
|
"eval_steps_per_second": 0.19, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.993668882557772e-05, |
|
"loss": 0.6064, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.985754985754986e-05, |
|
"loss": 0.6741, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.9778410889522e-05, |
|
"loss": 0.7149, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.9699271921494144e-05, |
|
"loss": 0.9701, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.962013295346629e-05, |
|
"loss": 0.6009, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.9540993985438435e-05, |
|
"loss": 1.3047, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.946185501741058e-05, |
|
"loss": 0.7303, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.938271604938271e-05, |
|
"loss": 0.7054, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.930357708135486e-05, |
|
"loss": 0.884, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.9224438113327004e-05, |
|
"loss": 0.7226, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.9145299145299147e-05, |
|
"loss": 0.8143, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.906616017727129e-05, |
|
"loss": 0.6343, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_accuracy": 0.7004608294930875, |
|
"eval_loss": 0.668888509273529, |
|
"eval_runtime": 284.8469, |
|
"eval_samples_per_second": 0.762, |
|
"eval_steps_per_second": 0.193, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.898702120924343e-05, |
|
"loss": 0.8134, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.890788224121557e-05, |
|
"loss": 0.664, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.882874327318772e-05, |
|
"loss": 0.7448, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.8749604305159865e-05, |
|
"loss": 0.6677, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 4.867046533713201e-05, |
|
"loss": 1.0314, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 4.859132636910415e-05, |
|
"loss": 0.6049, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 4.851218740107629e-05, |
|
"loss": 1.3879, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 4.8433048433048433e-05, |
|
"loss": 0.7445, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 4.835390946502058e-05, |
|
"loss": 0.7087, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 4.8274770496992725e-05, |
|
"loss": 0.5025, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 4.819563152896486e-05, |
|
"loss": 0.6759, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 4.811649256093701e-05, |
|
"loss": 0.7427, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_accuracy": 0.6728110599078341, |
|
"eval_loss": 0.9108946919441223, |
|
"eval_runtime": 320.8735, |
|
"eval_samples_per_second": 0.676, |
|
"eval_steps_per_second": 0.171, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.803735359290915e-05, |
|
"loss": 0.9541, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.7958214624881294e-05, |
|
"loss": 0.9822, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.787907565685344e-05, |
|
"loss": 0.9233, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 4.779993668882558e-05, |
|
"loss": 0.9739, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 4.772079772079772e-05, |
|
"loss": 0.966, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 4.764165875276987e-05, |
|
"loss": 0.8516, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 4.756251978474201e-05, |
|
"loss": 0.6187, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 4.7483380816714154e-05, |
|
"loss": 1.2194, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 4.7404241848686296e-05, |
|
"loss": 1.1313, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 4.732510288065844e-05, |
|
"loss": 0.7851, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 4.724596391263058e-05, |
|
"loss": 0.8549, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 4.716682494460272e-05, |
|
"loss": 0.5898, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_accuracy": 0.5944700460829493, |
|
"eval_loss": 1.0126854181289673, |
|
"eval_runtime": 324.9051, |
|
"eval_samples_per_second": 0.668, |
|
"eval_steps_per_second": 0.169, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.708768597657487e-05, |
|
"loss": 0.4566, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.700854700854701e-05, |
|
"loss": 0.9513, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.692940804051915e-05, |
|
"loss": 0.3735, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.68502690724913e-05, |
|
"loss": 0.7034, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.677113010446344e-05, |
|
"loss": 0.6437, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.669199113643558e-05, |
|
"loss": 0.8746, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.6612852168407725e-05, |
|
"loss": 0.6746, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.653371320037987e-05, |
|
"loss": 0.9281, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.645457423235201e-05, |
|
"loss": 0.9406, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.637543526432416e-05, |
|
"loss": 1.0328, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.8338, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.621715732826844e-05, |
|
"loss": 0.6258, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_accuracy": 0.7235023041474654, |
|
"eval_loss": 0.7130948305130005, |
|
"eval_runtime": 315.7938, |
|
"eval_samples_per_second": 0.687, |
|
"eval_steps_per_second": 0.174, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.6138018360240585e-05, |
|
"loss": 0.635, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.605887939221273e-05, |
|
"loss": 0.8311, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.597974042418487e-05, |
|
"loss": 1.2343, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 4.590060145615702e-05, |
|
"loss": 0.8252, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 4.582146248812916e-05, |
|
"loss": 0.7481, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 4.5742323520101296e-05, |
|
"loss": 0.4203, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 4.5663184552073445e-05, |
|
"loss": 0.5636, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 4.558404558404559e-05, |
|
"loss": 0.5062, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 4.550490661601773e-05, |
|
"loss": 0.5214, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 4.542576764798987e-05, |
|
"loss": 0.6217, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 4.5346628679962014e-05, |
|
"loss": 0.9957, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"eval_accuracy": 0.6728110599078341, |
|
"eval_loss": 0.9507045745849609, |
|
"eval_runtime": 315.2197, |
|
"eval_samples_per_second": 0.688, |
|
"eval_steps_per_second": 0.174, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.5267489711934157e-05, |
|
"loss": 0.5816, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.51883507439063e-05, |
|
"loss": 0.8868, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.510921177587845e-05, |
|
"loss": 0.7216, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.503007280785059e-05, |
|
"loss": 0.5962, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.4950933839822725e-05, |
|
"loss": 1.2384, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 0.8794, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.479265590376702e-05, |
|
"loss": 0.8364, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.471351693573916e-05, |
|
"loss": 0.8106, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.463437796771131e-05, |
|
"loss": 0.7839, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.455523899968344e-05, |
|
"loss": 0.7032, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.4476100031655586e-05, |
|
"loss": 0.9424, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 4.4396961063627735e-05, |
|
"loss": 0.401, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"eval_accuracy": 0.7188940092165899, |
|
"eval_loss": 0.6258705258369446, |
|
"eval_runtime": 308.0396, |
|
"eval_samples_per_second": 0.704, |
|
"eval_steps_per_second": 0.179, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.431782209559988e-05, |
|
"loss": 0.4381, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.423868312757202e-05, |
|
"loss": 0.5723, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.415954415954416e-05, |
|
"loss": 0.5368, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.4080405191516304e-05, |
|
"loss": 1.065, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 4.4001266223488446e-05, |
|
"loss": 0.5411, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 4.3922127255460595e-05, |
|
"loss": 1.1746, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 4.384298828743274e-05, |
|
"loss": 0.8506, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 4.376384931940488e-05, |
|
"loss": 0.9014, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 4.368471035137702e-05, |
|
"loss": 0.7609, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 4.3605571383349164e-05, |
|
"loss": 0.6263, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 4.3526432415321306e-05, |
|
"loss": 1.1741, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 4.344729344729345e-05, |
|
"loss": 0.5422, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"eval_accuracy": 0.6774193548387096, |
|
"eval_loss": 0.9453245997428894, |
|
"eval_runtime": 323.3925, |
|
"eval_samples_per_second": 0.671, |
|
"eval_steps_per_second": 0.17, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 4.336815447926559e-05, |
|
"loss": 0.607, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 4.328901551123773e-05, |
|
"loss": 0.7626, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 4.3209876543209875e-05, |
|
"loss": 0.4373, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 4.3130737575182024e-05, |
|
"loss": 0.5643, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 4.3051598607154166e-05, |
|
"loss": 0.7982, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 4.297245963912631e-05, |
|
"loss": 0.971, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 4.289332067109845e-05, |
|
"loss": 0.9764, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 4.281418170307059e-05, |
|
"loss": 0.8366, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 4.2735042735042735e-05, |
|
"loss": 1.0652, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 4.2655903767014884e-05, |
|
"loss": 0.7619, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 4.2576764798987026e-05, |
|
"loss": 0.639, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 4.249762583095916e-05, |
|
"loss": 0.6852, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"eval_accuracy": 0.7004608294930875, |
|
"eval_loss": 0.8649422526359558, |
|
"eval_runtime": 301.7734, |
|
"eval_samples_per_second": 0.719, |
|
"eval_steps_per_second": 0.182, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 4.241848686293131e-05, |
|
"loss": 0.9047, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 4.233934789490345e-05, |
|
"loss": 1.1309, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 4.2260208926875595e-05, |
|
"loss": 0.7564, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 4.2181069958847744e-05, |
|
"loss": 0.6608, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 4.210193099081988e-05, |
|
"loss": 0.5688, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 4.202279202279202e-05, |
|
"loss": 0.6834, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 4.194365305476417e-05, |
|
"loss": 0.7658, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 4.186451408673631e-05, |
|
"loss": 0.4818, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 4.1785375118708455e-05, |
|
"loss": 0.6077, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 4.17062361506806e-05, |
|
"loss": 0.6641, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 4.162709718265274e-05, |
|
"loss": 0.4068, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 4.154795821462488e-05, |
|
"loss": 0.8469, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"eval_accuracy": 0.6912442396313364, |
|
"eval_loss": 0.9379397630691528, |
|
"eval_runtime": 304.2958, |
|
"eval_samples_per_second": 0.713, |
|
"eval_steps_per_second": 0.181, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.1468819246597024e-05, |
|
"loss": 0.4658, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.138968027856917e-05, |
|
"loss": 0.4626, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.131054131054131e-05, |
|
"loss": 0.8248, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 4.123140234251345e-05, |
|
"loss": 0.883, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 4.11522633744856e-05, |
|
"loss": 0.4465, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 4.107312440645774e-05, |
|
"loss": 0.696, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 4.0993985438429884e-05, |
|
"loss": 0.852, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 4.091484647040203e-05, |
|
"loss": 0.7966, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 4.083570750237417e-05, |
|
"loss": 0.6836, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 4.075656853434631e-05, |
|
"loss": 0.8427, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 4.067742956631846e-05, |
|
"loss": 0.8492, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"eval_accuracy": 0.6451612903225806, |
|
"eval_loss": 0.900291919708252, |
|
"eval_runtime": 312.5208, |
|
"eval_samples_per_second": 0.694, |
|
"eval_steps_per_second": 0.176, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.05982905982906e-05, |
|
"loss": 0.9333, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.0519151630262745e-05, |
|
"loss": 0.788, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.044001266223489e-05, |
|
"loss": 0.6258, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.036087369420703e-05, |
|
"loss": 0.4548, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 4.028173472617917e-05, |
|
"loss": 0.7268, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 4.020259575815132e-05, |
|
"loss": 0.886, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 4.012345679012346e-05, |
|
"loss": 0.6221, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 4.00443178220956e-05, |
|
"loss": 0.5802, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 3.996517885406775e-05, |
|
"loss": 0.6743, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 3.988603988603989e-05, |
|
"loss": 0.6022, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 3.980690091801203e-05, |
|
"loss": 0.6691, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 3.9727761949984174e-05, |
|
"loss": 0.7633, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"eval_accuracy": 0.7235023041474654, |
|
"eval_loss": 0.7601491212844849, |
|
"eval_runtime": 299.492, |
|
"eval_samples_per_second": 0.725, |
|
"eval_steps_per_second": 0.184, |
|
"step": 2006 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.9648622981956316e-05, |
|
"loss": 0.7329, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.956948401392846e-05, |
|
"loss": 0.4407, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.94903450459006e-05, |
|
"loss": 0.3618, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.941120607787275e-05, |
|
"loss": 0.545, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 3.933206710984489e-05, |
|
"loss": 0.6322, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 3.925292814181703e-05, |
|
"loss": 0.7812, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 3.9173789173789176e-05, |
|
"loss": 0.5198, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 3.909465020576132e-05, |
|
"loss": 0.563, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 3.901551123773346e-05, |
|
"loss": 0.7545, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 3.893637226970561e-05, |
|
"loss": 0.9906, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 3.8857233301677745e-05, |
|
"loss": 0.807, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 3.877809433364989e-05, |
|
"loss": 0.6063, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"eval_accuracy": 0.7788018433179723, |
|
"eval_loss": 0.6181166768074036, |
|
"eval_runtime": 296.2425, |
|
"eval_samples_per_second": 0.733, |
|
"eval_steps_per_second": 0.186, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.8698955365622036e-05, |
|
"loss": 0.8029, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.861981639759418e-05, |
|
"loss": 0.7383, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.854067742956632e-05, |
|
"loss": 0.9192, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.8748, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 3.8382399493510605e-05, |
|
"loss": 0.5987, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 3.830326052548275e-05, |
|
"loss": 0.7523, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 3.8224121557454896e-05, |
|
"loss": 0.853, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 3.814498258942704e-05, |
|
"loss": 0.7287, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 3.806584362139918e-05, |
|
"loss": 0.8661, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 3.798670465337132e-05, |
|
"loss": 0.6155, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 3.7907565685343465e-05, |
|
"loss": 0.792, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 3.782842671731561e-05, |
|
"loss": 0.6436, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"eval_accuracy": 0.631336405529954, |
|
"eval_loss": 0.9444882273674011, |
|
"eval_runtime": 299.4212, |
|
"eval_samples_per_second": 0.725, |
|
"eval_steps_per_second": 0.184, |
|
"step": 2242 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 3.774928774928775e-05, |
|
"loss": 0.6049, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 3.767014878125989e-05, |
|
"loss": 0.6267, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 3.7591009813232034e-05, |
|
"loss": 0.4215, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 3.7511870845204176e-05, |
|
"loss": 0.9263, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 3.7432731877176325e-05, |
|
"loss": 0.4688, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 3.735359290914847e-05, |
|
"loss": 0.668, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 3.727445394112061e-05, |
|
"loss": 0.5778, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 3.719531497309275e-05, |
|
"loss": 0.4659, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 3.7116176005064894e-05, |
|
"loss": 0.5316, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.6448, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 3.6957898069009186e-05, |
|
"loss": 0.5125, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 3.687875910098133e-05, |
|
"loss": 0.8931, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"eval_accuracy": 0.728110599078341, |
|
"eval_loss": 0.8515065908432007, |
|
"eval_runtime": 305.633, |
|
"eval_samples_per_second": 0.71, |
|
"eval_steps_per_second": 0.18, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 3.679962013295346e-05, |
|
"loss": 0.6621, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 3.672048116492561e-05, |
|
"loss": 0.5259, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 3.6641342196897754e-05, |
|
"loss": 0.3653, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 3.65622032288699e-05, |
|
"loss": 0.4086, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 3.6483064260842046e-05, |
|
"loss": 0.4137, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 3.640392529281418e-05, |
|
"loss": 1.0001, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 3.6324786324786323e-05, |
|
"loss": 0.7551, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 3.624564735675847e-05, |
|
"loss": 0.7024, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 3.6166508388730615e-05, |
|
"loss": 0.6067, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 3.608736942070276e-05, |
|
"loss": 1.0071, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 3.60082304526749e-05, |
|
"loss": 0.8599, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"eval_accuracy": 0.6359447004608295, |
|
"eval_loss": 1.0786014795303345, |
|
"eval_runtime": 314.2163, |
|
"eval_samples_per_second": 0.691, |
|
"eval_steps_per_second": 0.175, |
|
"step": 2478 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 3.592909148464704e-05, |
|
"loss": 0.803, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 3.5849952516619184e-05, |
|
"loss": 0.4828, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 3.5770813548591326e-05, |
|
"loss": 0.3992, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 3.5691674580563475e-05, |
|
"loss": 0.62, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 3.561253561253561e-05, |
|
"loss": 0.7068, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 3.553339664450775e-05, |
|
"loss": 0.3764, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 3.54542576764799e-05, |
|
"loss": 0.7479, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 3.5375118708452044e-05, |
|
"loss": 0.7272, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 3.5295979740424186e-05, |
|
"loss": 0.653, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 3.521684077239633e-05, |
|
"loss": 0.5176, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 3.513770180436847e-05, |
|
"loss": 0.7597, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"learning_rate": 3.505856283634061e-05, |
|
"loss": 0.5183, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"eval_accuracy": 0.6866359447004609, |
|
"eval_loss": 0.948082447052002, |
|
"eval_runtime": 300.3013, |
|
"eval_samples_per_second": 0.723, |
|
"eval_steps_per_second": 0.183, |
|
"step": 2596 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 3.497942386831276e-05, |
|
"loss": 0.7119, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 3.4900284900284904e-05, |
|
"loss": 0.6381, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 3.4821145932257046e-05, |
|
"loss": 0.8518, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 3.474200696422919e-05, |
|
"loss": 0.9462, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 3.466286799620133e-05, |
|
"loss": 0.6183, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 3.458372902817347e-05, |
|
"loss": 0.7339, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 3.450459006014562e-05, |
|
"loss": 0.8466, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 3.4425451092117764e-05, |
|
"loss": 0.6976, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 3.43463121240899e-05, |
|
"loss": 0.9784, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 3.426717315606205e-05, |
|
"loss": 0.4932, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 3.418803418803419e-05, |
|
"loss": 0.5164, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"learning_rate": 3.410889522000633e-05, |
|
"loss": 0.7982, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"eval_accuracy": 0.7235023041474654, |
|
"eval_loss": 0.8364368677139282, |
|
"eval_runtime": 306.1906, |
|
"eval_samples_per_second": 0.709, |
|
"eval_steps_per_second": 0.18, |
|
"step": 2714 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 3.4029756251978475e-05, |
|
"loss": 0.6178, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 3.395061728395062e-05, |
|
"loss": 0.5603, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 3.387147831592276e-05, |
|
"loss": 0.6577, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 3.37923393478949e-05, |
|
"loss": 0.7942, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 3.371320037986705e-05, |
|
"loss": 0.4468, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 3.363406141183919e-05, |
|
"loss": 0.6931, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 3.355492244381133e-05, |
|
"loss": 0.5835, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 3.347578347578348e-05, |
|
"loss": 0.7099, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 3.339664450775562e-05, |
|
"loss": 0.6785, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 3.331750553972776e-05, |
|
"loss": 0.3025, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"learning_rate": 3.323836657169991e-05, |
|
"loss": 0.9246, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"learning_rate": 3.3159227603672046e-05, |
|
"loss": 1.0003, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"eval_accuracy": 0.7327188940092166, |
|
"eval_loss": 0.7810962796211243, |
|
"eval_runtime": 298.2745, |
|
"eval_samples_per_second": 0.728, |
|
"eval_steps_per_second": 0.184, |
|
"step": 2832 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 3.308008863564419e-05, |
|
"loss": 1.1583, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 3.300094966761634e-05, |
|
"loss": 0.4923, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 3.292181069958848e-05, |
|
"loss": 0.4868, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 3.284267173156062e-05, |
|
"loss": 0.7352, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 3.2763532763532764e-05, |
|
"loss": 0.8188, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 3.268439379550491e-05, |
|
"loss": 0.8439, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 3.260525482747705e-05, |
|
"loss": 0.821, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 3.25261158594492e-05, |
|
"loss": 0.6092, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 3.244697689142134e-05, |
|
"loss": 0.7416, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 3.236783792339348e-05, |
|
"loss": 0.4177, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 3.2288698955365625e-05, |
|
"loss": 0.5732, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 3.220955998733777e-05, |
|
"loss": 0.6666, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"eval_accuracy": 0.7465437788018433, |
|
"eval_loss": 0.7551702857017517, |
|
"eval_runtime": 297.83, |
|
"eval_samples_per_second": 0.729, |
|
"eval_steps_per_second": 0.185, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 3.213042101930991e-05, |
|
"loss": 0.5516, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 0.5643, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 3.1972143083254193e-05, |
|
"loss": 0.7, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 3.1893004115226336e-05, |
|
"loss": 0.4949, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 3.181386514719848e-05, |
|
"loss": 0.6588, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 3.173472617917063e-05, |
|
"loss": 0.6374, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 3.165558721114277e-05, |
|
"loss": 0.4516, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 3.157644824311491e-05, |
|
"loss": 0.6673, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 3.1497309275087054e-05, |
|
"loss": 0.6344, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 3.1418170307059196e-05, |
|
"loss": 0.422, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"learning_rate": 3.133903133903134e-05, |
|
"loss": 0.8527, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"eval_accuracy": 0.7188940092165899, |
|
"eval_loss": 0.8201001286506653, |
|
"eval_runtime": 289.214, |
|
"eval_samples_per_second": 0.75, |
|
"eval_steps_per_second": 0.19, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 3.125989237100349e-05, |
|
"loss": 0.71, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 3.118075340297563e-05, |
|
"loss": 0.7812, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 3.1101614434947765e-05, |
|
"loss": 0.4559, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 3.1022475466919914e-05, |
|
"loss": 0.5269, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 3.0943336498892056e-05, |
|
"loss": 0.75, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 0.7118, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 3.078505856283635e-05, |
|
"loss": 0.7999, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 3.070591959480848e-05, |
|
"loss": 0.4535, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 3.0626780626780625e-05, |
|
"loss": 0.7403, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 3.0547641658752774e-05, |
|
"loss": 0.7067, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 3.0468502690724916e-05, |
|
"loss": 0.6169, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"learning_rate": 3.0389363722697055e-05, |
|
"loss": 0.4678, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"eval_accuracy": 0.695852534562212, |
|
"eval_loss": 1.0259956121444702, |
|
"eval_runtime": 293.3586, |
|
"eval_samples_per_second": 0.74, |
|
"eval_steps_per_second": 0.187, |
|
"step": 3186 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 3.0310224754669204e-05, |
|
"loss": 0.4734, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 3.0231085786641343e-05, |
|
"loss": 0.6945, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 3.0151946818613485e-05, |
|
"loss": 0.667, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 3.0072807850585634e-05, |
|
"loss": 0.6138, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 2.9993668882557773e-05, |
|
"loss": 0.7627, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 2.9914529914529915e-05, |
|
"loss": 0.8703, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 2.9835390946502057e-05, |
|
"loss": 0.6306, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 2.9756251978474203e-05, |
|
"loss": 0.7293, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 2.9677113010446345e-05, |
|
"loss": 0.5378, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 2.9597974042418487e-05, |
|
"loss": 0.9667, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 2.9518835074390633e-05, |
|
"loss": 0.4905, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 2.9439696106362775e-05, |
|
"loss": 0.7354, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"eval_accuracy": 0.6866359447004609, |
|
"eval_loss": 0.8520135879516602, |
|
"eval_runtime": 253.7054, |
|
"eval_samples_per_second": 0.855, |
|
"eval_steps_per_second": 0.217, |
|
"step": 3304 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2.9360557138334914e-05, |
|
"loss": 0.4119, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2.9281418170307063e-05, |
|
"loss": 0.5532, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2.9202279202279202e-05, |
|
"loss": 0.5374, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 2.9123140234251344e-05, |
|
"loss": 0.4628, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 2.9044001266223493e-05, |
|
"loss": 0.6509, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 2.8964862298195632e-05, |
|
"loss": 0.3157, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 2.8885723330167774e-05, |
|
"loss": 0.5705, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 2.880658436213992e-05, |
|
"loss": 0.5273, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 2.8727445394112062e-05, |
|
"loss": 0.4847, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 2.8648306426084204e-05, |
|
"loss": 0.5194, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 2.856916745805635e-05, |
|
"loss": 0.391, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 2.8490028490028492e-05, |
|
"loss": 1.1097, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"eval_accuracy": 0.7327188940092166, |
|
"eval_loss": 0.9238936901092529, |
|
"eval_runtime": 306.5306, |
|
"eval_samples_per_second": 0.708, |
|
"eval_steps_per_second": 0.179, |
|
"step": 3422 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2.8410889522000634e-05, |
|
"loss": 0.7316, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2.833175055397278e-05, |
|
"loss": 0.5964, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2.8252611585944922e-05, |
|
"loss": 0.4294, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 2.817347261791706e-05, |
|
"loss": 0.5873, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 2.8094333649889203e-05, |
|
"loss": 0.5362, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 2.8015194681861352e-05, |
|
"loss": 0.7654, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 2.793605571383349e-05, |
|
"loss": 0.6749, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 2.7856916745805633e-05, |
|
"loss": 0.392, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.9069, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 2.769863880974992e-05, |
|
"loss": 0.4994, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 2.7619499841722064e-05, |
|
"loss": 0.7977, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 2.754036087369421e-05, |
|
"loss": 0.6264, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"eval_accuracy": 0.7557603686635944, |
|
"eval_loss": 0.6894146800041199, |
|
"eval_runtime": 290.6314, |
|
"eval_samples_per_second": 0.747, |
|
"eval_steps_per_second": 0.189, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2.746122190566635e-05, |
|
"loss": 1.0999, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2.7382082937638494e-05, |
|
"loss": 0.5372, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2.730294396961064e-05, |
|
"loss": 0.595, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.722380500158278e-05, |
|
"loss": 0.9679, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.714466603355492e-05, |
|
"loss": 0.9635, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.706552706552707e-05, |
|
"loss": 0.3871, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.698638809749921e-05, |
|
"loss": 0.4446, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.690724912947135e-05, |
|
"loss": 0.587, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.68281101614435e-05, |
|
"loss": 0.5359, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.6748971193415638e-05, |
|
"loss": 0.7691, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"learning_rate": 2.666983222538778e-05, |
|
"loss": 0.3348, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"eval_accuracy": 0.8064516129032258, |
|
"eval_loss": 0.6229776740074158, |
|
"eval_runtime": 293.7254, |
|
"eval_samples_per_second": 0.739, |
|
"eval_steps_per_second": 0.187, |
|
"step": 3658 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2.6590693257359926e-05, |
|
"loss": 0.9398, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2.651155428933207e-05, |
|
"loss": 0.6431, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2.643241532130421e-05, |
|
"loss": 0.3879, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2.6353276353276356e-05, |
|
"loss": 0.4384, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.62741373852485e-05, |
|
"loss": 0.6234, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.619499841722064e-05, |
|
"loss": 0.7331, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.611585944919278e-05, |
|
"loss": 0.3598, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.603672048116493e-05, |
|
"loss": 0.6426, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.595758151313707e-05, |
|
"loss": 0.5889, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.587844254510921e-05, |
|
"loss": 0.6661, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.579930357708136e-05, |
|
"loss": 0.672, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"learning_rate": 2.5720164609053497e-05, |
|
"loss": 0.5548, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"eval_accuracy": 0.8202764976958525, |
|
"eval_loss": 0.6430536508560181, |
|
"eval_runtime": 296.7673, |
|
"eval_samples_per_second": 0.731, |
|
"eval_steps_per_second": 0.185, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.6612, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2.5561886672997785e-05, |
|
"loss": 0.7013, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2.5482747704969927e-05, |
|
"loss": 0.7166, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2.540360873694207e-05, |
|
"loss": 0.8458, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 2.5324469768914215e-05, |
|
"loss": 0.7025, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 2.5245330800886358e-05, |
|
"loss": 0.4634, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 2.51661918328585e-05, |
|
"loss": 0.7509, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 2.5087052864830645e-05, |
|
"loss": 0.5975, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 2.5007913896802788e-05, |
|
"loss": 0.6689, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 2.492877492877493e-05, |
|
"loss": 0.4826, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 2.4849635960747072e-05, |
|
"loss": 0.4976, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 32.02, |
|
"learning_rate": 2.4770496992719218e-05, |
|
"loss": 0.4242, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 32.02, |
|
"eval_accuracy": 0.7050691244239631, |
|
"eval_loss": 0.8081349730491638, |
|
"eval_runtime": 298.374, |
|
"eval_samples_per_second": 0.727, |
|
"eval_steps_per_second": 0.184, |
|
"step": 3894 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 2.4691358024691357e-05, |
|
"loss": 0.3113, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 2.4612219056663502e-05, |
|
"loss": 0.6925, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 2.4533080088635644e-05, |
|
"loss": 0.6335, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 2.4453941120607787e-05, |
|
"loss": 0.5331, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 2.4374802152579932e-05, |
|
"loss": 0.6092, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 2.4295663184552074e-05, |
|
"loss": 0.534, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 2.4216524216524217e-05, |
|
"loss": 0.4707, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 2.4137385248496362e-05, |
|
"loss": 0.5926, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 2.4058246280468505e-05, |
|
"loss": 0.2674, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 2.3979107312440647e-05, |
|
"loss": 0.9024, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 2.389996834441279e-05, |
|
"loss": 0.3402, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 2.3820829376384935e-05, |
|
"loss": 0.5805, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"eval_accuracy": 0.8202764976958525, |
|
"eval_loss": 0.5598491430282593, |
|
"eval_runtime": 287.693, |
|
"eval_samples_per_second": 0.754, |
|
"eval_steps_per_second": 0.191, |
|
"step": 4012 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 2.3741690408357077e-05, |
|
"loss": 0.6835, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 2.366255144032922e-05, |
|
"loss": 0.6466, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 2.358341247230136e-05, |
|
"loss": 0.3698, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 2.3504273504273504e-05, |
|
"loss": 0.4723, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 2.342513453624565e-05, |
|
"loss": 0.7078, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 2.334599556821779e-05, |
|
"loss": 0.6876, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 2.3266856600189934e-05, |
|
"loss": 0.4859, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 2.318771763216208e-05, |
|
"loss": 0.5394, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 2.310857866413422e-05, |
|
"loss": 0.4636, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 2.3029439696106364e-05, |
|
"loss": 0.5758, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"learning_rate": 2.295030072807851e-05, |
|
"loss": 0.4957, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"learning_rate": 2.2871161760050648e-05, |
|
"loss": 0.7064, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"eval_accuracy": 0.7926267281105991, |
|
"eval_loss": 0.7340723276138306, |
|
"eval_runtime": 254.6894, |
|
"eval_samples_per_second": 0.852, |
|
"eval_steps_per_second": 0.216, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 2.2792022792022794e-05, |
|
"loss": 0.4694, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 2.2712883823994936e-05, |
|
"loss": 0.6956, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 2.2633744855967078e-05, |
|
"loss": 0.4924, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 2.2554605887939224e-05, |
|
"loss": 0.7678, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 2.2475466919911363e-05, |
|
"loss": 0.6094, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 2.239632795188351e-05, |
|
"loss": 0.3326, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 2.2317188983855654e-05, |
|
"loss": 0.623, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 2.2238050015827793e-05, |
|
"loss": 0.6507, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 2.215891104779994e-05, |
|
"loss": 0.5673, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 2.207977207977208e-05, |
|
"loss": 0.9691, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"learning_rate": 2.2000633111744223e-05, |
|
"loss": 0.2534, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"eval_accuracy": 0.783410138248848, |
|
"eval_loss": 0.6685347557067871, |
|
"eval_runtime": 253.8074, |
|
"eval_samples_per_second": 0.855, |
|
"eval_steps_per_second": 0.217, |
|
"step": 4248 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2.192149414371637e-05, |
|
"loss": 0.5305, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2.184235517568851e-05, |
|
"loss": 0.3181, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2.1763216207660653e-05, |
|
"loss": 0.2655, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2.1684077239632795e-05, |
|
"loss": 0.546, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 2.1604938271604937e-05, |
|
"loss": 0.5166, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 2.1525799303577083e-05, |
|
"loss": 0.612, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 2.1446660335549225e-05, |
|
"loss": 1.0337, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 2.1367521367521368e-05, |
|
"loss": 0.4693, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 2.1288382399493513e-05, |
|
"loss": 0.535, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 2.1209243431465655e-05, |
|
"loss": 0.2098, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 2.1130104463437798e-05, |
|
"loss": 0.5005, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"learning_rate": 2.105096549540994e-05, |
|
"loss": 0.7578, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"eval_accuracy": 0.7603686635944701, |
|
"eval_loss": 0.7591729760169983, |
|
"eval_runtime": 256.6972, |
|
"eval_samples_per_second": 0.845, |
|
"eval_steps_per_second": 0.214, |
|
"step": 4366 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2.0971826527382085e-05, |
|
"loss": 0.7535, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2.0892687559354228e-05, |
|
"loss": 0.5906, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2.081354859132637e-05, |
|
"loss": 0.5422, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2.0734409623298512e-05, |
|
"loss": 0.4581, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 2.0655270655270654e-05, |
|
"loss": 0.4551, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 2.05761316872428e-05, |
|
"loss": 0.528, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 2.0496992719214942e-05, |
|
"loss": 0.2991, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 2.0417853751187084e-05, |
|
"loss": 0.8417, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 2.033871478315923e-05, |
|
"loss": 0.924, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 2.0259575815131372e-05, |
|
"loss": 0.5173, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 2.0180436847103515e-05, |
|
"loss": 0.3303, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"learning_rate": 2.010129787907566e-05, |
|
"loss": 0.5822, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"eval_accuracy": 0.728110599078341, |
|
"eval_loss": 0.9471691250801086, |
|
"eval_runtime": 259.2305, |
|
"eval_samples_per_second": 0.837, |
|
"eval_steps_per_second": 0.212, |
|
"step": 4484 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 2.00221589110478e-05, |
|
"loss": 0.3202, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 1.9943019943019945e-05, |
|
"loss": 0.4554, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 1.9863880974992087e-05, |
|
"loss": 0.5358, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 1.978474200696423e-05, |
|
"loss": 0.3341, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 1.9705603038936375e-05, |
|
"loss": 1.1299, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 1.9626464070908514e-05, |
|
"loss": 0.4959, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 1.954732510288066e-05, |
|
"loss": 0.2519, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 1.9468186134852805e-05, |
|
"loss": 0.9426, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 1.9389047166824944e-05, |
|
"loss": 0.3135, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 1.930990819879709e-05, |
|
"loss": 0.528, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.8759, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"learning_rate": 1.9151630262741374e-05, |
|
"loss": 0.2939, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"eval_accuracy": 0.728110599078341, |
|
"eval_loss": 0.8887839317321777, |
|
"eval_runtime": 260.522, |
|
"eval_samples_per_second": 0.833, |
|
"eval_steps_per_second": 0.211, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 1.907249129471352e-05, |
|
"loss": 0.7159, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 1.899335232668566e-05, |
|
"loss": 0.7495, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 1.8914213358657804e-05, |
|
"loss": 0.6435, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 1.8835074390629946e-05, |
|
"loss": 0.5865, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 1.8755935422602088e-05, |
|
"loss": 0.5622, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 1.8676796454574234e-05, |
|
"loss": 0.4081, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 1.8597657486546376e-05, |
|
"loss": 0.5133, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.5643, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 1.8439379550490664e-05, |
|
"loss": 0.5888, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 1.8360240582462806e-05, |
|
"loss": 0.7294, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"learning_rate": 1.828110161443495e-05, |
|
"loss": 0.4845, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"learning_rate": 1.820196264640709e-05, |
|
"loss": 0.4795, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"eval_accuracy": 0.663594470046083, |
|
"eval_loss": 1.0767979621887207, |
|
"eval_runtime": 260.495, |
|
"eval_samples_per_second": 0.833, |
|
"eval_steps_per_second": 0.211, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 1.8122823678379236e-05, |
|
"loss": 0.3626, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 1.804368471035138e-05, |
|
"loss": 0.5294, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 1.796454574232352e-05, |
|
"loss": 0.8263, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 1.7885406774295663e-05, |
|
"loss": 0.6514, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 1.7806267806267805e-05, |
|
"loss": 0.5644, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 1.772712883823995e-05, |
|
"loss": 0.5152, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 1.7647989870212093e-05, |
|
"loss": 0.5133, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 1.7568850902184235e-05, |
|
"loss": 0.6261, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 1.748971193415638e-05, |
|
"loss": 0.8368, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 1.7410572966128523e-05, |
|
"loss": 0.5701, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 40.02, |
|
"learning_rate": 1.7331433998100665e-05, |
|
"loss": 0.4038, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 40.02, |
|
"eval_accuracy": 0.8064516129032258, |
|
"eval_loss": 0.6451985836029053, |
|
"eval_runtime": 261.7369, |
|
"eval_samples_per_second": 0.829, |
|
"eval_steps_per_second": 0.21, |
|
"step": 4838 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 1.725229503007281e-05, |
|
"loss": 0.4527, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 1.717315606204495e-05, |
|
"loss": 0.9572, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 1.7094017094017095e-05, |
|
"loss": 0.5401, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 1.7014878125989238e-05, |
|
"loss": 0.3303, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 1.693573915796138e-05, |
|
"loss": 0.6231, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 1.6856600189933525e-05, |
|
"loss": 0.4519, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 1.6777461221905664e-05, |
|
"loss": 0.6937, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 1.669832225387781e-05, |
|
"loss": 0.4248, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 1.6619183285849956e-05, |
|
"loss": 0.6655, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 1.6540044317822094e-05, |
|
"loss": 0.5651, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 1.646090534979424e-05, |
|
"loss": 0.6075, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 41.02, |
|
"learning_rate": 1.6381766381766382e-05, |
|
"loss": 0.8347, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 41.02, |
|
"eval_accuracy": 0.7926267281105991, |
|
"eval_loss": 0.7040404677391052, |
|
"eval_runtime": 257.2136, |
|
"eval_samples_per_second": 0.844, |
|
"eval_steps_per_second": 0.214, |
|
"step": 4956 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 1.6302627413738524e-05, |
|
"loss": 0.7086, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 1.622348844571067e-05, |
|
"loss": 0.6848, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 1.6144349477682812e-05, |
|
"loss": 0.718, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 1.6065210509654955e-05, |
|
"loss": 0.3821, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 1.5986071541627097e-05, |
|
"loss": 0.488, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 1.590693257359924e-05, |
|
"loss": 0.3441, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 1.5827793605571385e-05, |
|
"loss": 0.5323, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 1.5748654637543527e-05, |
|
"loss": 0.3723, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 1.566951566951567e-05, |
|
"loss": 0.5536, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 1.5590376701487815e-05, |
|
"loss": 0.2562, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 1.5511237733459957e-05, |
|
"loss": 0.4913, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 42.02, |
|
"learning_rate": 1.54320987654321e-05, |
|
"loss": 0.4113, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 42.02, |
|
"eval_accuracy": 0.7373271889400922, |
|
"eval_loss": 0.8011646866798401, |
|
"eval_runtime": 252.0655, |
|
"eval_samples_per_second": 0.861, |
|
"eval_steps_per_second": 0.218, |
|
"step": 5074 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 1.535295979740424e-05, |
|
"loss": 0.1942, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 1.5273820829376387e-05, |
|
"loss": 0.6847, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 1.5194681861348528e-05, |
|
"loss": 0.5127, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 1.5115542893320671e-05, |
|
"loss": 0.18, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 1.5036403925292817e-05, |
|
"loss": 0.6956, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 1.4957264957264958e-05, |
|
"loss": 0.7358, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 1.4878125989237102e-05, |
|
"loss": 0.7034, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 1.4798987021209244e-05, |
|
"loss": 0.5671, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 1.4719848053181388e-05, |
|
"loss": 0.276, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 1.4640709085153532e-05, |
|
"loss": 0.5131, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 43.02, |
|
"learning_rate": 1.4561570117125672e-05, |
|
"loss": 0.6712, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 43.02, |
|
"learning_rate": 1.4482431149097816e-05, |
|
"loss": 0.3681, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 43.02, |
|
"eval_accuracy": 0.7880184331797235, |
|
"eval_loss": 0.762200117111206, |
|
"eval_runtime": 254.5668, |
|
"eval_samples_per_second": 0.852, |
|
"eval_steps_per_second": 0.216, |
|
"step": 5192 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 1.440329218106996e-05, |
|
"loss": 0.4388, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 1.4324153213042102e-05, |
|
"loss": 0.4731, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 1.4245014245014246e-05, |
|
"loss": 0.4798, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 1.416587527698639e-05, |
|
"loss": 0.3783, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 1.408673630895853e-05, |
|
"loss": 0.2336, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 1.4007597340930676e-05, |
|
"loss": 0.3797, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 1.3928458372902817e-05, |
|
"loss": 0.4423, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 1.384931940487496e-05, |
|
"loss": 0.5598, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 1.3770180436847105e-05, |
|
"loss": 0.5773, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 1.3691041468819247e-05, |
|
"loss": 0.3113, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"learning_rate": 1.361190250079139e-05, |
|
"loss": 0.2841, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"learning_rate": 1.3532763532763535e-05, |
|
"loss": 1.0092, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"eval_accuracy": 0.7880184331797235, |
|
"eval_loss": 0.7931644916534424, |
|
"eval_runtime": 259.7621, |
|
"eval_samples_per_second": 0.835, |
|
"eval_steps_per_second": 0.212, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 1.3453624564735675e-05, |
|
"loss": 0.6327, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 1.3374485596707819e-05, |
|
"loss": 1.0639, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 1.3295346628679963e-05, |
|
"loss": 0.2824, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 1.3216207660652105e-05, |
|
"loss": 0.3033, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 1.313706869262425e-05, |
|
"loss": 0.5596, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 1.305792972459639e-05, |
|
"loss": 0.4285, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 1.2978790756568535e-05, |
|
"loss": 0.7668, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 1.289965178854068e-05, |
|
"loss": 0.6117, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 0.5597, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 1.2741373852484964e-05, |
|
"loss": 0.2782, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 45.02, |
|
"learning_rate": 1.2662234884457108e-05, |
|
"loss": 0.321, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 45.02, |
|
"eval_accuracy": 0.7373271889400922, |
|
"eval_loss": 0.9068748354911804, |
|
"eval_runtime": 259.5409, |
|
"eval_samples_per_second": 0.836, |
|
"eval_steps_per_second": 0.212, |
|
"step": 5428 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 1.258309591642925e-05, |
|
"loss": 0.5204, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 1.2503956948401394e-05, |
|
"loss": 0.5057, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 1.2424817980373536e-05, |
|
"loss": 0.5511, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 1.2345679012345678e-05, |
|
"loss": 0.6262, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 1.2266540044317822e-05, |
|
"loss": 0.3925, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 1.2187401076289966e-05, |
|
"loss": 0.5744, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 1.2108262108262108e-05, |
|
"loss": 0.4216, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 1.2029123140234252e-05, |
|
"loss": 0.6228, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 1.1949984172206395e-05, |
|
"loss": 0.3394, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 1.1870845204178538e-05, |
|
"loss": 0.7777, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 1.179170623615068e-05, |
|
"loss": 0.3025, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 46.02, |
|
"learning_rate": 1.1712567268122825e-05, |
|
"loss": 0.399, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 46.02, |
|
"eval_accuracy": 0.8110599078341014, |
|
"eval_loss": 0.6439275741577148, |
|
"eval_runtime": 263.8811, |
|
"eval_samples_per_second": 0.822, |
|
"eval_steps_per_second": 0.208, |
|
"step": 5546 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 1.1633428300094967e-05, |
|
"loss": 0.6139, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 1.155428933206711e-05, |
|
"loss": 0.4756, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 1.1475150364039255e-05, |
|
"loss": 0.4553, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 1.1396011396011397e-05, |
|
"loss": 0.4806, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 1.1316872427983539e-05, |
|
"loss": 0.6994, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 1.1237733459955681e-05, |
|
"loss": 0.6582, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 1.1158594491927827e-05, |
|
"loss": 0.241, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 1.107945552389997e-05, |
|
"loss": 0.6311, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 1.1000316555872111e-05, |
|
"loss": 0.8412, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 1.0921177587844255e-05, |
|
"loss": 0.2699, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 1.0842038619816398e-05, |
|
"loss": 0.3901, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 47.02, |
|
"learning_rate": 1.0762899651788542e-05, |
|
"loss": 0.3699, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 47.02, |
|
"eval_accuracy": 0.7695852534562212, |
|
"eval_loss": 0.7740164399147034, |
|
"eval_runtime": 266.667, |
|
"eval_samples_per_second": 0.814, |
|
"eval_steps_per_second": 0.206, |
|
"step": 5664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 1.0683760683760684e-05, |
|
"loss": 0.8451, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 1.0604621715732828e-05, |
|
"loss": 0.5939, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 1.052548274770497e-05, |
|
"loss": 0.7695, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 1.0446343779677114e-05, |
|
"loss": 0.5514, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 1.0367204811649256e-05, |
|
"loss": 0.4196, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 1.02880658436214e-05, |
|
"loss": 0.2501, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 1.0208926875593542e-05, |
|
"loss": 0.6282, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 1.0129787907565686e-05, |
|
"loss": 0.5634, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 1.005064893953783e-05, |
|
"loss": 0.3623, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 9.971509971509972e-06, |
|
"loss": 0.4048, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"learning_rate": 9.892371003482115e-06, |
|
"loss": 0.5855, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"learning_rate": 9.813232035454257e-06, |
|
"loss": 0.4297, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"eval_accuracy": 0.8248847926267281, |
|
"eval_loss": 0.6810868978500366, |
|
"eval_runtime": 261.851, |
|
"eval_samples_per_second": 0.829, |
|
"eval_steps_per_second": 0.21, |
|
"step": 5782 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 9.734093067426402e-06, |
|
"loss": 0.3992, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 9.654954099398545e-06, |
|
"loss": 1.0214, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 9.575815131370687e-06, |
|
"loss": 0.4853, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 9.49667616334283e-06, |
|
"loss": 0.2907, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 9.417537195314973e-06, |
|
"loss": 0.4338, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 9.338398227287117e-06, |
|
"loss": 0.3648, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.8034, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 9.180120291231403e-06, |
|
"loss": 0.7394, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 9.100981323203545e-06, |
|
"loss": 0.494, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 9.02184235517569e-06, |
|
"loss": 0.9404, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"learning_rate": 8.942703387147831e-06, |
|
"loss": 0.4178, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"learning_rate": 8.863564419119975e-06, |
|
"loss": 0.2783, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"eval_accuracy": 0.8525345622119815, |
|
"eval_loss": 0.586846113204956, |
|
"eval_runtime": 260.0845, |
|
"eval_samples_per_second": 0.834, |
|
"eval_steps_per_second": 0.211, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 8.784425451092118e-06, |
|
"loss": 0.4036, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 8.705286483064262e-06, |
|
"loss": 0.6644, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 8.626147515036405e-06, |
|
"loss": 0.2605, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 50.01, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 0.6677, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 50.01, |
|
"learning_rate": 8.46786957898069e-06, |
|
"loss": 0.4203, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 50.01, |
|
"learning_rate": 8.388730610952832e-06, |
|
"loss": 0.286, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 50.01, |
|
"learning_rate": 8.309591642924978e-06, |
|
"loss": 0.5586, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 50.01, |
|
"learning_rate": 8.23045267489712e-06, |
|
"loss": 0.5642, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 50.01, |
|
"learning_rate": 8.151313706869262e-06, |
|
"loss": 0.583, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 50.01, |
|
"learning_rate": 8.072174738841406e-06, |
|
"loss": 0.2579, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 50.02, |
|
"learning_rate": 7.993035770813548e-06, |
|
"loss": 0.4946, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 50.02, |
|
"eval_accuracy": 0.7926267281105991, |
|
"eval_loss": 0.673189103603363, |
|
"eval_runtime": 263.3409, |
|
"eval_samples_per_second": 0.824, |
|
"eval_steps_per_second": 0.209, |
|
"step": 6018 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 7.913896802785692e-06, |
|
"loss": 0.2915, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 7.834757834757835e-06, |
|
"loss": 0.4099, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 7.755618866729978e-06, |
|
"loss": 0.3578, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 7.67647989870212e-06, |
|
"loss": 0.6813, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 51.01, |
|
"learning_rate": 7.597340930674264e-06, |
|
"loss": 0.7868, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 51.01, |
|
"learning_rate": 7.5182019626464085e-06, |
|
"loss": 0.3421, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 51.01, |
|
"learning_rate": 7.439062994618551e-06, |
|
"loss": 0.3972, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 51.01, |
|
"learning_rate": 7.359924026590694e-06, |
|
"loss": 0.4223, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 51.01, |
|
"learning_rate": 7.280785058562836e-06, |
|
"loss": 0.4222, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 51.01, |
|
"learning_rate": 7.20164609053498e-06, |
|
"loss": 0.5646, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 51.01, |
|
"learning_rate": 7.122507122507123e-06, |
|
"loss": 0.5948, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 51.02, |
|
"learning_rate": 7.043368154479265e-06, |
|
"loss": 0.3058, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 51.02, |
|
"eval_accuracy": 0.8341013824884793, |
|
"eval_loss": 0.551148533821106, |
|
"eval_runtime": 267.39, |
|
"eval_samples_per_second": 0.812, |
|
"eval_steps_per_second": 0.206, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 6.964229186451408e-06, |
|
"loss": 0.4489, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 6.885090218423552e-06, |
|
"loss": 0.6678, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 6.805951250395695e-06, |
|
"loss": 0.4425, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 6.726812282367838e-06, |
|
"loss": 0.4, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"learning_rate": 6.6476733143399815e-06, |
|
"loss": 0.6218, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"learning_rate": 6.568534346312125e-06, |
|
"loss": 0.4908, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"learning_rate": 6.489395378284268e-06, |
|
"loss": 0.4602, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"learning_rate": 6.41025641025641e-06, |
|
"loss": 0.3036, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"learning_rate": 6.331117442228554e-06, |
|
"loss": 0.229, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"learning_rate": 6.251978474200697e-06, |
|
"loss": 0.5506, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"learning_rate": 6.172839506172839e-06, |
|
"loss": 0.201, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 52.02, |
|
"learning_rate": 6.093700538144983e-06, |
|
"loss": 0.1286, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 52.02, |
|
"eval_accuracy": 0.8294930875576036, |
|
"eval_loss": 0.5877251029014587, |
|
"eval_runtime": 267.204, |
|
"eval_samples_per_second": 0.812, |
|
"eval_steps_per_second": 0.206, |
|
"step": 6254 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 6.014561570117126e-06, |
|
"loss": 0.5248, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 5.935422602089269e-06, |
|
"loss": 0.5, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 5.856283634061412e-06, |
|
"loss": 0.484, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"learning_rate": 5.777144666033555e-06, |
|
"loss": 0.4989, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"learning_rate": 5.6980056980056985e-06, |
|
"loss": 0.71, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"learning_rate": 5.618866729977841e-06, |
|
"loss": 0.8, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"learning_rate": 5.539727761949985e-06, |
|
"loss": 0.5235, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"learning_rate": 5.460588793922128e-06, |
|
"loss": 0.9647, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"learning_rate": 5.381449825894271e-06, |
|
"loss": 0.3382, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"learning_rate": 5.302310857866414e-06, |
|
"loss": 0.4008, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 53.02, |
|
"learning_rate": 5.223171889838557e-06, |
|
"loss": 0.5169, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 53.02, |
|
"learning_rate": 5.1440329218107e-06, |
|
"loss": 0.2013, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 53.02, |
|
"eval_accuracy": 0.815668202764977, |
|
"eval_loss": 0.6507552266120911, |
|
"eval_runtime": 274.5228, |
|
"eval_samples_per_second": 0.79, |
|
"eval_steps_per_second": 0.2, |
|
"step": 6372 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 5.064893953782843e-06, |
|
"loss": 1.0097, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 4.985754985754986e-06, |
|
"loss": 0.2331, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 4.906616017727128e-06, |
|
"loss": 0.388, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 54.01, |
|
"learning_rate": 4.827477049699272e-06, |
|
"loss": 0.4358, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 54.01, |
|
"learning_rate": 4.748338081671415e-06, |
|
"loss": 0.4098, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 54.01, |
|
"learning_rate": 4.6691991136435585e-06, |
|
"loss": 0.3491, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 54.01, |
|
"learning_rate": 4.5900601456157015e-06, |
|
"loss": 0.2839, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 54.01, |
|
"learning_rate": 4.510921177587845e-06, |
|
"loss": 0.1628, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 54.01, |
|
"learning_rate": 4.431782209559988e-06, |
|
"loss": 1.0689, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 54.01, |
|
"learning_rate": 4.352643241532131e-06, |
|
"loss": 0.6193, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 54.02, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 0.7058, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 54.02, |
|
"learning_rate": 4.194365305476416e-06, |
|
"loss": 0.2027, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 54.02, |
|
"eval_accuracy": 0.815668202764977, |
|
"eval_loss": 0.6629670858383179, |
|
"eval_runtime": 274.2142, |
|
"eval_samples_per_second": 0.791, |
|
"eval_steps_per_second": 0.201, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 4.11522633744856e-06, |
|
"loss": 0.4227, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 4.036087369420703e-06, |
|
"loss": 0.8382, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 3.956948401392846e-06, |
|
"loss": 0.5188, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"learning_rate": 3.877809433364989e-06, |
|
"loss": 0.4906, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"learning_rate": 3.798670465337132e-06, |
|
"loss": 0.4729, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"learning_rate": 3.7195314973092754e-06, |
|
"loss": 0.6123, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"learning_rate": 3.640392529281418e-06, |
|
"loss": 0.5144, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"learning_rate": 3.5612535612535615e-06, |
|
"loss": 0.863, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"learning_rate": 3.482114593225704e-06, |
|
"loss": 0.4071, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"learning_rate": 3.4029756251978477e-06, |
|
"loss": 0.5755, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 55.02, |
|
"learning_rate": 3.3238366571699908e-06, |
|
"loss": 0.6267, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 55.02, |
|
"eval_accuracy": 0.8064516129032258, |
|
"eval_loss": 0.7372620701789856, |
|
"eval_runtime": 267.3033, |
|
"eval_samples_per_second": 0.812, |
|
"eval_steps_per_second": 0.206, |
|
"step": 6608 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 3.244697689142134e-06, |
|
"loss": 0.3038, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 3.165558721114277e-06, |
|
"loss": 0.6753, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 3.0864197530864196e-06, |
|
"loss": 0.4761, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 3.007280785058563e-06, |
|
"loss": 0.4697, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 56.01, |
|
"learning_rate": 2.928141817030706e-06, |
|
"loss": 0.2215, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 56.01, |
|
"learning_rate": 2.8490028490028492e-06, |
|
"loss": 0.4558, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 56.01, |
|
"learning_rate": 2.7698638809749923e-06, |
|
"loss": 0.7401, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 56.01, |
|
"learning_rate": 2.6907249129471354e-06, |
|
"loss": 0.5682, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 56.01, |
|
"learning_rate": 2.6115859449192785e-06, |
|
"loss": 0.5346, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 56.01, |
|
"learning_rate": 2.5324469768914215e-06, |
|
"loss": 0.7019, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 56.01, |
|
"learning_rate": 2.453308008863564e-06, |
|
"loss": 0.4813, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 56.02, |
|
"learning_rate": 2.3741690408357077e-06, |
|
"loss": 0.4561, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 56.02, |
|
"eval_accuracy": 0.8018433179723502, |
|
"eval_loss": 0.7382919192314148, |
|
"eval_runtime": 275.3448, |
|
"eval_samples_per_second": 0.788, |
|
"eval_steps_per_second": 0.2, |
|
"step": 6726 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 2.2950300728078508e-06, |
|
"loss": 0.4686, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 2.215891104779994e-06, |
|
"loss": 0.3041, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 2.136752136752137e-06, |
|
"loss": 0.241, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 2.05761316872428e-06, |
|
"loss": 0.4126, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 57.01, |
|
"learning_rate": 1.978474200696423e-06, |
|
"loss": 0.286, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 57.01, |
|
"learning_rate": 1.899335232668566e-06, |
|
"loss": 0.582, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 57.01, |
|
"learning_rate": 1.820196264640709e-06, |
|
"loss": 0.3814, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 57.01, |
|
"learning_rate": 1.741057296612852e-06, |
|
"loss": 0.6788, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 57.01, |
|
"learning_rate": 1.6619183285849954e-06, |
|
"loss": 0.44, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 57.01, |
|
"learning_rate": 1.5827793605571385e-06, |
|
"loss": 0.4996, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 57.01, |
|
"learning_rate": 1.5036403925292815e-06, |
|
"loss": 0.2839, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 57.02, |
|
"learning_rate": 1.4245014245014246e-06, |
|
"loss": 0.7002, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 57.02, |
|
"eval_accuracy": 0.8110599078341014, |
|
"eval_loss": 0.7072679400444031, |
|
"eval_runtime": 266.757, |
|
"eval_samples_per_second": 0.813, |
|
"eval_steps_per_second": 0.206, |
|
"step": 6844 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 1.3453624564735677e-06, |
|
"loss": 0.215, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 1.2662234884457108e-06, |
|
"loss": 0.6569, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 1.1870845204178538e-06, |
|
"loss": 0.6116, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 58.01, |
|
"learning_rate": 1.107945552389997e-06, |
|
"loss": 0.3416, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 58.01, |
|
"learning_rate": 1.02880658436214e-06, |
|
"loss": 0.571, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 58.01, |
|
"learning_rate": 9.49667616334283e-07, |
|
"loss": 0.6238, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 58.01, |
|
"learning_rate": 8.70528648306426e-07, |
|
"loss": 0.346, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 58.01, |
|
"learning_rate": 7.913896802785692e-07, |
|
"loss": 0.4527, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 58.01, |
|
"learning_rate": 7.122507122507123e-07, |
|
"loss": 0.5342, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 58.01, |
|
"learning_rate": 6.331117442228554e-07, |
|
"loss": 0.523, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 58.02, |
|
"learning_rate": 5.539727761949985e-07, |
|
"loss": 0.2861, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 58.02, |
|
"learning_rate": 4.748338081671415e-07, |
|
"loss": 0.1823, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 58.02, |
|
"eval_accuracy": 0.8202764976958525, |
|
"eval_loss": 0.6870871186256409, |
|
"eval_runtime": 266.4451, |
|
"eval_samples_per_second": 0.814, |
|
"eval_steps_per_second": 0.206, |
|
"step": 6962 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 3.956948401392846e-07, |
|
"loss": 0.5552, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 3.165558721114277e-07, |
|
"loss": 0.4444, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 2.3741690408357074e-07, |
|
"loss": 0.4752, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"learning_rate": 1.5827793605571385e-07, |
|
"loss": 0.176, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"learning_rate": 7.913896802785692e-08, |
|
"loss": 0.8408, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"learning_rate": 0.0, |
|
"loss": 0.2439, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"eval_accuracy": 0.8202764976958525, |
|
"eval_loss": 0.690118670463562, |
|
"eval_runtime": 307.1421, |
|
"eval_samples_per_second": 0.707, |
|
"eval_steps_per_second": 0.179, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"step": 7020, |
|
"total_flos": 3.476987046291161e+19, |
|
"train_loss": 0.6254725841715125, |
|
"train_runtime": 65940.1985, |
|
"train_samples_per_second": 0.426, |
|
"train_steps_per_second": 0.106 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"eval_accuracy": 0.6990740740740741, |
|
"eval_loss": 1.074745774269104, |
|
"eval_runtime": 265.3059, |
|
"eval_samples_per_second": 0.814, |
|
"eval_steps_per_second": 0.204, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"eval_accuracy": 0.6990740740740741, |
|
"eval_loss": 1.074745774269104, |
|
"eval_runtime": 283.9645, |
|
"eval_samples_per_second": 0.761, |
|
"eval_steps_per_second": 0.19, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"eval_accuracy": 0.6990740740740741, |
|
"eval_loss": 1.0747456550598145, |
|
"eval_runtime": 273.0941, |
|
"eval_samples_per_second": 0.791, |
|
"eval_steps_per_second": 0.198, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"eval_accuracy": 0.6990740740740741, |
|
"eval_loss": 1.074745774269104, |
|
"eval_runtime": 320.3996, |
|
"eval_samples_per_second": 0.674, |
|
"eval_steps_per_second": 0.169, |
|
"step": 7020 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 7020, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 3.476987046291161e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|