{ "best_metric": 0.8525345622119815, "best_model_checkpoint": "videomae-base-finetuned-subset\\checkpoint-5900", "epoch": 59.00826210826211, "eval_steps": 500, "global_step": 7020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 7.122507122507123e-07, "loss": 1.1155, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.4245014245014246e-06, "loss": 1.1405, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.136752136752137e-06, "loss": 0.5289, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.8490028490028492e-06, "loss": 0.8832, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.5612535612535615e-06, "loss": 1.0172, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.273504273504274e-06, "loss": 0.7363, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.985754985754986e-06, "loss": 0.8444, "step": 70 }, { "epoch": 0.01, "learning_rate": 5.6980056980056985e-06, "loss": 0.8984, "step": 80 }, { "epoch": 0.01, "learning_rate": 6.41025641025641e-06, "loss": 1.085, "step": 90 }, { "epoch": 0.01, "learning_rate": 7.122507122507123e-06, "loss": 0.7478, "step": 100 }, { "epoch": 0.02, "learning_rate": 7.834757834757835e-06, "loss": 0.9225, "step": 110 }, { "epoch": 0.02, "eval_accuracy": 0.7419354838709677, "eval_loss": 0.7362823486328125, "eval_runtime": 289.676, "eval_samples_per_second": 0.749, "eval_steps_per_second": 0.19, "step": 118 }, { "epoch": 1.0, "learning_rate": 8.547008547008548e-06, "loss": 0.8071, "step": 120 }, { "epoch": 1.0, "learning_rate": 9.259259259259259e-06, "loss": 0.4488, "step": 130 }, { "epoch": 1.0, "learning_rate": 9.971509971509972e-06, "loss": 1.23, "step": 140 }, { "epoch": 1.0, "learning_rate": 1.0683760683760684e-05, "loss": 0.7655, "step": 150 }, { "epoch": 1.01, "learning_rate": 1.1396011396011397e-05, "loss": 0.8764, "step": 160 }, { "epoch": 1.01, "learning_rate": 1.2108262108262108e-05, "loss": 0.5023, "step": 170 }, { "epoch": 1.01, "learning_rate": 1.282051282051282e-05, "loss": 0.5699, "step": 180 }, { "epoch": 1.01, "learning_rate": 1.3532763532763535e-05, "loss": 0.8942, "step": 190 }, { "epoch": 1.01, "learning_rate": 1.4245014245014246e-05, "loss": 0.7764, "step": 200 }, { "epoch": 1.01, "learning_rate": 1.4957264957264958e-05, "loss": 0.8046, "step": 210 }, { "epoch": 1.01, "learning_rate": 1.566951566951567e-05, "loss": 0.5461, "step": 220 }, { "epoch": 1.02, "learning_rate": 1.6381766381766382e-05, "loss": 0.8357, "step": 230 }, { "epoch": 1.02, "eval_accuracy": 0.7004608294930875, "eval_loss": 0.911893367767334, "eval_runtime": 306.0217, "eval_samples_per_second": 0.709, "eval_steps_per_second": 0.18, "step": 236 }, { "epoch": 2.0, "learning_rate": 1.7094017094017095e-05, "loss": 0.575, "step": 240 }, { "epoch": 2.0, "learning_rate": 1.7806267806267805e-05, "loss": 0.7452, "step": 250 }, { "epoch": 2.0, "learning_rate": 1.8518518518518518e-05, "loss": 0.7453, "step": 260 }, { "epoch": 2.0, "learning_rate": 1.923076923076923e-05, "loss": 0.5861, "step": 270 }, { "epoch": 2.01, "learning_rate": 1.9943019943019945e-05, "loss": 0.4319, "step": 280 }, { "epoch": 2.01, "learning_rate": 2.0655270655270654e-05, "loss": 0.532, "step": 290 }, { "epoch": 2.01, "learning_rate": 2.1367521367521368e-05, "loss": 0.8728, "step": 300 }, { "epoch": 2.01, "learning_rate": 2.207977207977208e-05, "loss": 1.1116, "step": 310 }, { "epoch": 2.01, "learning_rate": 2.2792022792022794e-05, "loss": 0.9775, "step": 320 }, { "epoch": 2.01, "learning_rate": 2.3504273504273504e-05, "loss": 1.1248, "step": 330 }, { "epoch": 2.01, "learning_rate": 2.4216524216524217e-05, "loss": 0.7516, "step": 340 }, { "epoch": 2.02, "learning_rate": 2.492877492877493e-05, "loss": 0.474, "step": 350 }, { "epoch": 2.02, "eval_accuracy": 0.6820276497695853, "eval_loss": 0.9698442220687866, "eval_runtime": 313.4651, "eval_samples_per_second": 0.692, "eval_steps_per_second": 0.175, "step": 354 }, { "epoch": 3.0, "learning_rate": 2.564102564102564e-05, "loss": 0.999, "step": 360 }, { "epoch": 3.0, "learning_rate": 2.6353276353276356e-05, "loss": 0.758, "step": 370 }, { "epoch": 3.0, "learning_rate": 2.706552706552707e-05, "loss": 0.6963, "step": 380 }, { "epoch": 3.01, "learning_rate": 2.777777777777778e-05, "loss": 0.5529, "step": 390 }, { "epoch": 3.01, "learning_rate": 2.8490028490028492e-05, "loss": 0.7653, "step": 400 }, { "epoch": 3.01, "learning_rate": 2.9202279202279202e-05, "loss": 0.8444, "step": 410 }, { "epoch": 3.01, "learning_rate": 2.9914529914529915e-05, "loss": 0.6609, "step": 420 }, { "epoch": 3.01, "learning_rate": 3.0626780626780625e-05, "loss": 0.5681, "step": 430 }, { "epoch": 3.01, "learning_rate": 3.133903133903134e-05, "loss": 0.7641, "step": 440 }, { "epoch": 3.01, "learning_rate": 3.205128205128206e-05, "loss": 0.7485, "step": 450 }, { "epoch": 3.02, "learning_rate": 3.2763532763532764e-05, "loss": 0.7458, "step": 460 }, { "epoch": 3.02, "learning_rate": 3.347578347578348e-05, "loss": 0.7899, "step": 470 }, { "epoch": 3.02, "eval_accuracy": 0.6774193548387096, "eval_loss": 1.135077714920044, "eval_runtime": 285.9321, "eval_samples_per_second": 0.759, "eval_steps_per_second": 0.192, "step": 472 }, { "epoch": 4.0, "learning_rate": 3.418803418803419e-05, "loss": 0.4874, "step": 480 }, { "epoch": 4.0, "learning_rate": 3.4900284900284904e-05, "loss": 0.6289, "step": 490 }, { "epoch": 4.0, "learning_rate": 3.561253561253561e-05, "loss": 0.6611, "step": 500 }, { "epoch": 4.01, "learning_rate": 3.6324786324786323e-05, "loss": 0.7506, "step": 510 }, { "epoch": 4.01, "learning_rate": 3.7037037037037037e-05, "loss": 0.777, "step": 520 }, { "epoch": 4.01, "learning_rate": 3.774928774928775e-05, "loss": 0.9948, "step": 530 }, { "epoch": 4.01, "learning_rate": 3.846153846153846e-05, "loss": 0.704, "step": 540 }, { "epoch": 4.01, "learning_rate": 3.9173789173789176e-05, "loss": 0.8743, "step": 550 }, { "epoch": 4.01, "learning_rate": 3.988603988603989e-05, "loss": 0.6572, "step": 560 }, { "epoch": 4.01, "learning_rate": 4.05982905982906e-05, "loss": 1.0614, "step": 570 }, { "epoch": 4.02, "learning_rate": 4.131054131054131e-05, "loss": 0.6374, "step": 580 }, { "epoch": 4.02, "learning_rate": 4.202279202279202e-05, "loss": 0.9015, "step": 590 }, { "epoch": 4.02, "eval_accuracy": 0.4976958525345622, "eval_loss": 1.3822749853134155, "eval_runtime": 285.2486, "eval_samples_per_second": 0.761, "eval_steps_per_second": 0.193, "step": 590 }, { "epoch": 5.0, "learning_rate": 4.2735042735042735e-05, "loss": 1.1476, "step": 600 }, { "epoch": 5.0, "learning_rate": 4.344729344729345e-05, "loss": 0.7482, "step": 610 }, { "epoch": 5.0, "learning_rate": 4.415954415954416e-05, "loss": 0.835, "step": 620 }, { "epoch": 5.01, "learning_rate": 4.4871794871794874e-05, "loss": 0.5413, "step": 630 }, { "epoch": 5.01, "learning_rate": 4.558404558404559e-05, "loss": 0.9714, "step": 640 }, { "epoch": 5.01, "learning_rate": 4.62962962962963e-05, "loss": 0.4952, "step": 650 }, { "epoch": 5.01, "learning_rate": 4.700854700854701e-05, "loss": 1.0349, "step": 660 }, { "epoch": 5.01, "learning_rate": 4.772079772079772e-05, "loss": 0.8882, "step": 670 }, { "epoch": 5.01, "learning_rate": 4.8433048433048433e-05, "loss": 0.9244, "step": 680 }, { "epoch": 5.01, "learning_rate": 4.9145299145299147e-05, "loss": 1.1217, "step": 690 }, { "epoch": 5.02, "learning_rate": 4.985754985754986e-05, "loss": 0.7402, "step": 700 }, { "epoch": 5.02, "eval_accuracy": 0.695852534562212, "eval_loss": 0.8660895824432373, "eval_runtime": 289.3403, "eval_samples_per_second": 0.75, "eval_steps_per_second": 0.19, "step": 708 }, { "epoch": 6.0, "learning_rate": 4.993668882557772e-05, "loss": 0.6064, "step": 710 }, { "epoch": 6.0, "learning_rate": 4.985754985754986e-05, "loss": 0.6741, "step": 720 }, { "epoch": 6.0, "learning_rate": 4.9778410889522e-05, "loss": 0.7149, "step": 730 }, { "epoch": 6.0, "learning_rate": 4.9699271921494144e-05, "loss": 0.9701, "step": 740 }, { "epoch": 6.01, "learning_rate": 4.962013295346629e-05, "loss": 0.6009, "step": 750 }, { "epoch": 6.01, "learning_rate": 4.9540993985438435e-05, "loss": 1.3047, "step": 760 }, { "epoch": 6.01, "learning_rate": 4.946185501741058e-05, "loss": 0.7303, "step": 770 }, { "epoch": 6.01, "learning_rate": 4.938271604938271e-05, "loss": 0.7054, "step": 780 }, { "epoch": 6.01, "learning_rate": 4.930357708135486e-05, "loss": 0.884, "step": 790 }, { "epoch": 6.01, "learning_rate": 4.9224438113327004e-05, "loss": 0.7226, "step": 800 }, { "epoch": 6.01, "learning_rate": 4.9145299145299147e-05, "loss": 0.8143, "step": 810 }, { "epoch": 6.02, "learning_rate": 4.906616017727129e-05, "loss": 0.6343, "step": 820 }, { "epoch": 6.02, "eval_accuracy": 0.7004608294930875, "eval_loss": 0.668888509273529, "eval_runtime": 284.8469, "eval_samples_per_second": 0.762, "eval_steps_per_second": 0.193, "step": 826 }, { "epoch": 7.0, "learning_rate": 4.898702120924343e-05, "loss": 0.8134, "step": 830 }, { "epoch": 7.0, "learning_rate": 4.890788224121557e-05, "loss": 0.664, "step": 840 }, { "epoch": 7.0, "learning_rate": 4.882874327318772e-05, "loss": 0.7448, "step": 850 }, { "epoch": 7.0, "learning_rate": 4.8749604305159865e-05, "loss": 0.6677, "step": 860 }, { "epoch": 7.01, "learning_rate": 4.867046533713201e-05, "loss": 1.0314, "step": 870 }, { "epoch": 7.01, "learning_rate": 4.859132636910415e-05, "loss": 0.6049, "step": 880 }, { "epoch": 7.01, "learning_rate": 4.851218740107629e-05, "loss": 1.3879, "step": 890 }, { "epoch": 7.01, "learning_rate": 4.8433048433048433e-05, "loss": 0.7445, "step": 900 }, { "epoch": 7.01, "learning_rate": 4.835390946502058e-05, "loss": 0.7087, "step": 910 }, { "epoch": 7.01, "learning_rate": 4.8274770496992725e-05, "loss": 0.5025, "step": 920 }, { "epoch": 7.01, "learning_rate": 4.819563152896486e-05, "loss": 0.6759, "step": 930 }, { "epoch": 7.02, "learning_rate": 4.811649256093701e-05, "loss": 0.7427, "step": 940 }, { "epoch": 7.02, "eval_accuracy": 0.6728110599078341, "eval_loss": 0.9108946919441223, "eval_runtime": 320.8735, "eval_samples_per_second": 0.676, "eval_steps_per_second": 0.171, "step": 944 }, { "epoch": 8.0, "learning_rate": 4.803735359290915e-05, "loss": 0.9541, "step": 950 }, { "epoch": 8.0, "learning_rate": 4.7958214624881294e-05, "loss": 0.9822, "step": 960 }, { "epoch": 8.0, "learning_rate": 4.787907565685344e-05, "loss": 0.9233, "step": 970 }, { "epoch": 8.01, "learning_rate": 4.779993668882558e-05, "loss": 0.9739, "step": 980 }, { "epoch": 8.01, "learning_rate": 4.772079772079772e-05, "loss": 0.966, "step": 990 }, { "epoch": 8.01, "learning_rate": 4.764165875276987e-05, "loss": 0.8516, "step": 1000 }, { "epoch": 8.01, "learning_rate": 4.756251978474201e-05, "loss": 0.6187, "step": 1010 }, { "epoch": 8.01, "learning_rate": 4.7483380816714154e-05, "loss": 1.2194, "step": 1020 }, { "epoch": 8.01, "learning_rate": 4.7404241848686296e-05, "loss": 1.1313, "step": 1030 }, { "epoch": 8.01, "learning_rate": 4.732510288065844e-05, "loss": 0.7851, "step": 1040 }, { "epoch": 8.02, "learning_rate": 4.724596391263058e-05, "loss": 0.8549, "step": 1050 }, { "epoch": 8.02, "learning_rate": 4.716682494460272e-05, "loss": 0.5898, "step": 1060 }, { "epoch": 8.02, "eval_accuracy": 0.5944700460829493, "eval_loss": 1.0126854181289673, "eval_runtime": 324.9051, "eval_samples_per_second": 0.668, "eval_steps_per_second": 0.169, "step": 1062 }, { "epoch": 9.0, "learning_rate": 4.708768597657487e-05, "loss": 0.4566, "step": 1070 }, { "epoch": 9.0, "learning_rate": 4.700854700854701e-05, "loss": 0.9513, "step": 1080 }, { "epoch": 9.0, "learning_rate": 4.692940804051915e-05, "loss": 0.3735, "step": 1090 }, { "epoch": 9.01, "learning_rate": 4.68502690724913e-05, "loss": 0.7034, "step": 1100 }, { "epoch": 9.01, "learning_rate": 4.677113010446344e-05, "loss": 0.6437, "step": 1110 }, { "epoch": 9.01, "learning_rate": 4.669199113643558e-05, "loss": 0.8746, "step": 1120 }, { "epoch": 9.01, "learning_rate": 4.6612852168407725e-05, "loss": 0.6746, "step": 1130 }, { "epoch": 9.01, "learning_rate": 4.653371320037987e-05, "loss": 0.9281, "step": 1140 }, { "epoch": 9.01, "learning_rate": 4.645457423235201e-05, "loss": 0.9406, "step": 1150 }, { "epoch": 9.01, "learning_rate": 4.637543526432416e-05, "loss": 1.0328, "step": 1160 }, { "epoch": 9.02, "learning_rate": 4.62962962962963e-05, "loss": 0.8338, "step": 1170 }, { "epoch": 9.02, "learning_rate": 4.621715732826844e-05, "loss": 0.6258, "step": 1180 }, { "epoch": 9.02, "eval_accuracy": 0.7235023041474654, "eval_loss": 0.7130948305130005, "eval_runtime": 315.7938, "eval_samples_per_second": 0.687, "eval_steps_per_second": 0.174, "step": 1180 }, { "epoch": 10.0, "learning_rate": 4.6138018360240585e-05, "loss": 0.635, "step": 1190 }, { "epoch": 10.0, "learning_rate": 4.605887939221273e-05, "loss": 0.8311, "step": 1200 }, { "epoch": 10.0, "learning_rate": 4.597974042418487e-05, "loss": 1.2343, "step": 1210 }, { "epoch": 10.01, "learning_rate": 4.590060145615702e-05, "loss": 0.8252, "step": 1220 }, { "epoch": 10.01, "learning_rate": 4.582146248812916e-05, "loss": 0.7481, "step": 1230 }, { "epoch": 10.01, "learning_rate": 4.5742323520101296e-05, "loss": 0.4203, "step": 1240 }, { "epoch": 10.01, "learning_rate": 4.5663184552073445e-05, "loss": 0.5636, "step": 1250 }, { "epoch": 10.01, "learning_rate": 4.558404558404559e-05, "loss": 0.5062, "step": 1260 }, { "epoch": 10.01, "learning_rate": 4.550490661601773e-05, "loss": 0.5214, "step": 1270 }, { "epoch": 10.01, "learning_rate": 4.542576764798987e-05, "loss": 0.6217, "step": 1280 }, { "epoch": 10.02, "learning_rate": 4.5346628679962014e-05, "loss": 0.9957, "step": 1290 }, { "epoch": 10.02, "eval_accuracy": 0.6728110599078341, "eval_loss": 0.9507045745849609, "eval_runtime": 315.2197, "eval_samples_per_second": 0.688, "eval_steps_per_second": 0.174, "step": 1298 }, { "epoch": 11.0, "learning_rate": 4.5267489711934157e-05, "loss": 0.5816, "step": 1300 }, { "epoch": 11.0, "learning_rate": 4.51883507439063e-05, "loss": 0.8868, "step": 1310 }, { "epoch": 11.0, "learning_rate": 4.510921177587845e-05, "loss": 0.7216, "step": 1320 }, { "epoch": 11.0, "learning_rate": 4.503007280785059e-05, "loss": 0.5962, "step": 1330 }, { "epoch": 11.01, "learning_rate": 4.4950933839822725e-05, "loss": 1.2384, "step": 1340 }, { "epoch": 11.01, "learning_rate": 4.4871794871794874e-05, "loss": 0.8794, "step": 1350 }, { "epoch": 11.01, "learning_rate": 4.479265590376702e-05, "loss": 0.8364, "step": 1360 }, { "epoch": 11.01, "learning_rate": 4.471351693573916e-05, "loss": 0.8106, "step": 1370 }, { "epoch": 11.01, "learning_rate": 4.463437796771131e-05, "loss": 0.7839, "step": 1380 }, { "epoch": 11.01, "learning_rate": 4.455523899968344e-05, "loss": 0.7032, "step": 1390 }, { "epoch": 11.01, "learning_rate": 4.4476100031655586e-05, "loss": 0.9424, "step": 1400 }, { "epoch": 11.02, "learning_rate": 4.4396961063627735e-05, "loss": 0.401, "step": 1410 }, { "epoch": 11.02, "eval_accuracy": 0.7188940092165899, "eval_loss": 0.6258705258369446, "eval_runtime": 308.0396, "eval_samples_per_second": 0.704, "eval_steps_per_second": 0.179, "step": 1416 }, { "epoch": 12.0, "learning_rate": 4.431782209559988e-05, "loss": 0.4381, "step": 1420 }, { "epoch": 12.0, "learning_rate": 4.423868312757202e-05, "loss": 0.5723, "step": 1430 }, { "epoch": 12.0, "learning_rate": 4.415954415954416e-05, "loss": 0.5368, "step": 1440 }, { "epoch": 12.0, "learning_rate": 4.4080405191516304e-05, "loss": 1.065, "step": 1450 }, { "epoch": 12.01, "learning_rate": 4.4001266223488446e-05, "loss": 0.5411, "step": 1460 }, { "epoch": 12.01, "learning_rate": 4.3922127255460595e-05, "loss": 1.1746, "step": 1470 }, { "epoch": 12.01, "learning_rate": 4.384298828743274e-05, "loss": 0.8506, "step": 1480 }, { "epoch": 12.01, "learning_rate": 4.376384931940488e-05, "loss": 0.9014, "step": 1490 }, { "epoch": 12.01, "learning_rate": 4.368471035137702e-05, "loss": 0.7609, "step": 1500 }, { "epoch": 12.01, "learning_rate": 4.3605571383349164e-05, "loss": 0.6263, "step": 1510 }, { "epoch": 12.01, "learning_rate": 4.3526432415321306e-05, "loss": 1.1741, "step": 1520 }, { "epoch": 12.02, "learning_rate": 4.344729344729345e-05, "loss": 0.5422, "step": 1530 }, { "epoch": 12.02, "eval_accuracy": 0.6774193548387096, "eval_loss": 0.9453245997428894, "eval_runtime": 323.3925, "eval_samples_per_second": 0.671, "eval_steps_per_second": 0.17, "step": 1534 }, { "epoch": 13.0, "learning_rate": 4.336815447926559e-05, "loss": 0.607, "step": 1540 }, { "epoch": 13.0, "learning_rate": 4.328901551123773e-05, "loss": 0.7626, "step": 1550 }, { "epoch": 13.0, "learning_rate": 4.3209876543209875e-05, "loss": 0.4373, "step": 1560 }, { "epoch": 13.01, "learning_rate": 4.3130737575182024e-05, "loss": 0.5643, "step": 1570 }, { "epoch": 13.01, "learning_rate": 4.3051598607154166e-05, "loss": 0.7982, "step": 1580 }, { "epoch": 13.01, "learning_rate": 4.297245963912631e-05, "loss": 0.971, "step": 1590 }, { "epoch": 13.01, "learning_rate": 4.289332067109845e-05, "loss": 0.9764, "step": 1600 }, { "epoch": 13.01, "learning_rate": 4.281418170307059e-05, "loss": 0.8366, "step": 1610 }, { "epoch": 13.01, "learning_rate": 4.2735042735042735e-05, "loss": 1.0652, "step": 1620 }, { "epoch": 13.01, "learning_rate": 4.2655903767014884e-05, "loss": 0.7619, "step": 1630 }, { "epoch": 13.02, "learning_rate": 4.2576764798987026e-05, "loss": 0.639, "step": 1640 }, { "epoch": 13.02, "learning_rate": 4.249762583095916e-05, "loss": 0.6852, "step": 1650 }, { "epoch": 13.02, "eval_accuracy": 0.7004608294930875, "eval_loss": 0.8649422526359558, "eval_runtime": 301.7734, "eval_samples_per_second": 0.719, "eval_steps_per_second": 0.182, "step": 1652 }, { "epoch": 14.0, "learning_rate": 4.241848686293131e-05, "loss": 0.9047, "step": 1660 }, { "epoch": 14.0, "learning_rate": 4.233934789490345e-05, "loss": 1.1309, "step": 1670 }, { "epoch": 14.0, "learning_rate": 4.2260208926875595e-05, "loss": 0.7564, "step": 1680 }, { "epoch": 14.01, "learning_rate": 4.2181069958847744e-05, "loss": 0.6608, "step": 1690 }, { "epoch": 14.01, "learning_rate": 4.210193099081988e-05, "loss": 0.5688, "step": 1700 }, { "epoch": 14.01, "learning_rate": 4.202279202279202e-05, "loss": 0.6834, "step": 1710 }, { "epoch": 14.01, "learning_rate": 4.194365305476417e-05, "loss": 0.7658, "step": 1720 }, { "epoch": 14.01, "learning_rate": 4.186451408673631e-05, "loss": 0.4818, "step": 1730 }, { "epoch": 14.01, "learning_rate": 4.1785375118708455e-05, "loss": 0.6077, "step": 1740 }, { "epoch": 14.01, "learning_rate": 4.17062361506806e-05, "loss": 0.6641, "step": 1750 }, { "epoch": 14.02, "learning_rate": 4.162709718265274e-05, "loss": 0.4068, "step": 1760 }, { "epoch": 14.02, "learning_rate": 4.154795821462488e-05, "loss": 0.8469, "step": 1770 }, { "epoch": 14.02, "eval_accuracy": 0.6912442396313364, "eval_loss": 0.9379397630691528, "eval_runtime": 304.2958, "eval_samples_per_second": 0.713, "eval_steps_per_second": 0.181, "step": 1770 }, { "epoch": 15.0, "learning_rate": 4.1468819246597024e-05, "loss": 0.4658, "step": 1780 }, { "epoch": 15.0, "learning_rate": 4.138968027856917e-05, "loss": 0.4626, "step": 1790 }, { "epoch": 15.0, "learning_rate": 4.131054131054131e-05, "loss": 0.8248, "step": 1800 }, { "epoch": 15.01, "learning_rate": 4.123140234251345e-05, "loss": 0.883, "step": 1810 }, { "epoch": 15.01, "learning_rate": 4.11522633744856e-05, "loss": 0.4465, "step": 1820 }, { "epoch": 15.01, "learning_rate": 4.107312440645774e-05, "loss": 0.696, "step": 1830 }, { "epoch": 15.01, "learning_rate": 4.0993985438429884e-05, "loss": 0.852, "step": 1840 }, { "epoch": 15.01, "learning_rate": 4.091484647040203e-05, "loss": 0.7966, "step": 1850 }, { "epoch": 15.01, "learning_rate": 4.083570750237417e-05, "loss": 0.6836, "step": 1860 }, { "epoch": 15.01, "learning_rate": 4.075656853434631e-05, "loss": 0.8427, "step": 1870 }, { "epoch": 15.02, "learning_rate": 4.067742956631846e-05, "loss": 0.8492, "step": 1880 }, { "epoch": 15.02, "eval_accuracy": 0.6451612903225806, "eval_loss": 0.900291919708252, "eval_runtime": 312.5208, "eval_samples_per_second": 0.694, "eval_steps_per_second": 0.176, "step": 1888 }, { "epoch": 16.0, "learning_rate": 4.05982905982906e-05, "loss": 0.9333, "step": 1890 }, { "epoch": 16.0, "learning_rate": 4.0519151630262745e-05, "loss": 0.788, "step": 1900 }, { "epoch": 16.0, "learning_rate": 4.044001266223489e-05, "loss": 0.6258, "step": 1910 }, { "epoch": 16.0, "learning_rate": 4.036087369420703e-05, "loss": 0.4548, "step": 1920 }, { "epoch": 16.01, "learning_rate": 4.028173472617917e-05, "loss": 0.7268, "step": 1930 }, { "epoch": 16.01, "learning_rate": 4.020259575815132e-05, "loss": 0.886, "step": 1940 }, { "epoch": 16.01, "learning_rate": 4.012345679012346e-05, "loss": 0.6221, "step": 1950 }, { "epoch": 16.01, "learning_rate": 4.00443178220956e-05, "loss": 0.5802, "step": 1960 }, { "epoch": 16.01, "learning_rate": 3.996517885406775e-05, "loss": 0.6743, "step": 1970 }, { "epoch": 16.01, "learning_rate": 3.988603988603989e-05, "loss": 0.6022, "step": 1980 }, { "epoch": 16.01, "learning_rate": 3.980690091801203e-05, "loss": 0.6691, "step": 1990 }, { "epoch": 16.02, "learning_rate": 3.9727761949984174e-05, "loss": 0.7633, "step": 2000 }, { "epoch": 16.02, "eval_accuracy": 0.7235023041474654, "eval_loss": 0.7601491212844849, "eval_runtime": 299.492, "eval_samples_per_second": 0.725, "eval_steps_per_second": 0.184, "step": 2006 }, { "epoch": 17.0, "learning_rate": 3.9648622981956316e-05, "loss": 0.7329, "step": 2010 }, { "epoch": 17.0, "learning_rate": 3.956948401392846e-05, "loss": 0.4407, "step": 2020 }, { "epoch": 17.0, "learning_rate": 3.94903450459006e-05, "loss": 0.3618, "step": 2030 }, { "epoch": 17.0, "learning_rate": 3.941120607787275e-05, "loss": 0.545, "step": 2040 }, { "epoch": 17.01, "learning_rate": 3.933206710984489e-05, "loss": 0.6322, "step": 2050 }, { "epoch": 17.01, "learning_rate": 3.925292814181703e-05, "loss": 0.7812, "step": 2060 }, { "epoch": 17.01, "learning_rate": 3.9173789173789176e-05, "loss": 0.5198, "step": 2070 }, { "epoch": 17.01, "learning_rate": 3.909465020576132e-05, "loss": 0.563, "step": 2080 }, { "epoch": 17.01, "learning_rate": 3.901551123773346e-05, "loss": 0.7545, "step": 2090 }, { "epoch": 17.01, "learning_rate": 3.893637226970561e-05, "loss": 0.9906, "step": 2100 }, { "epoch": 17.01, "learning_rate": 3.8857233301677745e-05, "loss": 0.807, "step": 2110 }, { "epoch": 17.02, "learning_rate": 3.877809433364989e-05, "loss": 0.6063, "step": 2120 }, { "epoch": 17.02, "eval_accuracy": 0.7788018433179723, "eval_loss": 0.6181166768074036, "eval_runtime": 296.2425, "eval_samples_per_second": 0.733, "eval_steps_per_second": 0.186, "step": 2124 }, { "epoch": 18.0, "learning_rate": 3.8698955365622036e-05, "loss": 0.8029, "step": 2130 }, { "epoch": 18.0, "learning_rate": 3.861981639759418e-05, "loss": 0.7383, "step": 2140 }, { "epoch": 18.0, "learning_rate": 3.854067742956632e-05, "loss": 0.9192, "step": 2150 }, { "epoch": 18.01, "learning_rate": 3.846153846153846e-05, "loss": 0.8748, "step": 2160 }, { "epoch": 18.01, "learning_rate": 3.8382399493510605e-05, "loss": 0.5987, "step": 2170 }, { "epoch": 18.01, "learning_rate": 3.830326052548275e-05, "loss": 0.7523, "step": 2180 }, { "epoch": 18.01, "learning_rate": 3.8224121557454896e-05, "loss": 0.853, "step": 2190 }, { "epoch": 18.01, "learning_rate": 3.814498258942704e-05, "loss": 0.7287, "step": 2200 }, { "epoch": 18.01, "learning_rate": 3.806584362139918e-05, "loss": 0.8661, "step": 2210 }, { "epoch": 18.01, "learning_rate": 3.798670465337132e-05, "loss": 0.6155, "step": 2220 }, { "epoch": 18.02, "learning_rate": 3.7907565685343465e-05, "loss": 0.792, "step": 2230 }, { "epoch": 18.02, "learning_rate": 3.782842671731561e-05, "loss": 0.6436, "step": 2240 }, { "epoch": 18.02, "eval_accuracy": 0.631336405529954, "eval_loss": 0.9444882273674011, "eval_runtime": 299.4212, "eval_samples_per_second": 0.725, "eval_steps_per_second": 0.184, "step": 2242 }, { "epoch": 19.0, "learning_rate": 3.774928774928775e-05, "loss": 0.6049, "step": 2250 }, { "epoch": 19.0, "learning_rate": 3.767014878125989e-05, "loss": 0.6267, "step": 2260 }, { "epoch": 19.0, "learning_rate": 3.7591009813232034e-05, "loss": 0.4215, "step": 2270 }, { "epoch": 19.01, "learning_rate": 3.7511870845204176e-05, "loss": 0.9263, "step": 2280 }, { "epoch": 19.01, "learning_rate": 3.7432731877176325e-05, "loss": 0.4688, "step": 2290 }, { "epoch": 19.01, "learning_rate": 3.735359290914847e-05, "loss": 0.668, "step": 2300 }, { "epoch": 19.01, "learning_rate": 3.727445394112061e-05, "loss": 0.5778, "step": 2310 }, { "epoch": 19.01, "learning_rate": 3.719531497309275e-05, "loss": 0.4659, "step": 2320 }, { "epoch": 19.01, "learning_rate": 3.7116176005064894e-05, "loss": 0.5316, "step": 2330 }, { "epoch": 19.01, "learning_rate": 3.7037037037037037e-05, "loss": 0.6448, "step": 2340 }, { "epoch": 19.02, "learning_rate": 3.6957898069009186e-05, "loss": 0.5125, "step": 2350 }, { "epoch": 19.02, "learning_rate": 3.687875910098133e-05, "loss": 0.8931, "step": 2360 }, { "epoch": 19.02, "eval_accuracy": 0.728110599078341, "eval_loss": 0.8515065908432007, "eval_runtime": 305.633, "eval_samples_per_second": 0.71, "eval_steps_per_second": 0.18, "step": 2360 }, { "epoch": 20.0, "learning_rate": 3.679962013295346e-05, "loss": 0.6621, "step": 2370 }, { "epoch": 20.0, "learning_rate": 3.672048116492561e-05, "loss": 0.5259, "step": 2380 }, { "epoch": 20.0, "learning_rate": 3.6641342196897754e-05, "loss": 0.3653, "step": 2390 }, { "epoch": 20.01, "learning_rate": 3.65622032288699e-05, "loss": 0.4086, "step": 2400 }, { "epoch": 20.01, "learning_rate": 3.6483064260842046e-05, "loss": 0.4137, "step": 2410 }, { "epoch": 20.01, "learning_rate": 3.640392529281418e-05, "loss": 1.0001, "step": 2420 }, { "epoch": 20.01, "learning_rate": 3.6324786324786323e-05, "loss": 0.7551, "step": 2430 }, { "epoch": 20.01, "learning_rate": 3.624564735675847e-05, "loss": 0.7024, "step": 2440 }, { "epoch": 20.01, "learning_rate": 3.6166508388730615e-05, "loss": 0.6067, "step": 2450 }, { "epoch": 20.01, "learning_rate": 3.608736942070276e-05, "loss": 1.0071, "step": 2460 }, { "epoch": 20.02, "learning_rate": 3.60082304526749e-05, "loss": 0.8599, "step": 2470 }, { "epoch": 20.02, "eval_accuracy": 0.6359447004608295, "eval_loss": 1.0786014795303345, "eval_runtime": 314.2163, "eval_samples_per_second": 0.691, "eval_steps_per_second": 0.175, "step": 2478 }, { "epoch": 21.0, "learning_rate": 3.592909148464704e-05, "loss": 0.803, "step": 2480 }, { "epoch": 21.0, "learning_rate": 3.5849952516619184e-05, "loss": 0.4828, "step": 2490 }, { "epoch": 21.0, "learning_rate": 3.5770813548591326e-05, "loss": 0.3992, "step": 2500 }, { "epoch": 21.0, "learning_rate": 3.5691674580563475e-05, "loss": 0.62, "step": 2510 }, { "epoch": 21.01, "learning_rate": 3.561253561253561e-05, "loss": 0.7068, "step": 2520 }, { "epoch": 21.01, "learning_rate": 3.553339664450775e-05, "loss": 0.3764, "step": 2530 }, { "epoch": 21.01, "learning_rate": 3.54542576764799e-05, "loss": 0.7479, "step": 2540 }, { "epoch": 21.01, "learning_rate": 3.5375118708452044e-05, "loss": 0.7272, "step": 2550 }, { "epoch": 21.01, "learning_rate": 3.5295979740424186e-05, "loss": 0.653, "step": 2560 }, { "epoch": 21.01, "learning_rate": 3.521684077239633e-05, "loss": 0.5176, "step": 2570 }, { "epoch": 21.01, "learning_rate": 3.513770180436847e-05, "loss": 0.7597, "step": 2580 }, { "epoch": 21.02, "learning_rate": 3.505856283634061e-05, "loss": 0.5183, "step": 2590 }, { "epoch": 21.02, "eval_accuracy": 0.6866359447004609, "eval_loss": 0.948082447052002, "eval_runtime": 300.3013, "eval_samples_per_second": 0.723, "eval_steps_per_second": 0.183, "step": 2596 }, { "epoch": 22.0, "learning_rate": 3.497942386831276e-05, "loss": 0.7119, "step": 2600 }, { "epoch": 22.0, "learning_rate": 3.4900284900284904e-05, "loss": 0.6381, "step": 2610 }, { "epoch": 22.0, "learning_rate": 3.4821145932257046e-05, "loss": 0.8518, "step": 2620 }, { "epoch": 22.0, "learning_rate": 3.474200696422919e-05, "loss": 0.9462, "step": 2630 }, { "epoch": 22.01, "learning_rate": 3.466286799620133e-05, "loss": 0.6183, "step": 2640 }, { "epoch": 22.01, "learning_rate": 3.458372902817347e-05, "loss": 0.7339, "step": 2650 }, { "epoch": 22.01, "learning_rate": 3.450459006014562e-05, "loss": 0.8466, "step": 2660 }, { "epoch": 22.01, "learning_rate": 3.4425451092117764e-05, "loss": 0.6976, "step": 2670 }, { "epoch": 22.01, "learning_rate": 3.43463121240899e-05, "loss": 0.9784, "step": 2680 }, { "epoch": 22.01, "learning_rate": 3.426717315606205e-05, "loss": 0.4932, "step": 2690 }, { "epoch": 22.01, "learning_rate": 3.418803418803419e-05, "loss": 0.5164, "step": 2700 }, { "epoch": 22.02, "learning_rate": 3.410889522000633e-05, "loss": 0.7982, "step": 2710 }, { "epoch": 22.02, "eval_accuracy": 0.7235023041474654, "eval_loss": 0.8364368677139282, "eval_runtime": 306.1906, "eval_samples_per_second": 0.709, "eval_steps_per_second": 0.18, "step": 2714 }, { "epoch": 23.0, "learning_rate": 3.4029756251978475e-05, "loss": 0.6178, "step": 2720 }, { "epoch": 23.0, "learning_rate": 3.395061728395062e-05, "loss": 0.5603, "step": 2730 }, { "epoch": 23.0, "learning_rate": 3.387147831592276e-05, "loss": 0.6577, "step": 2740 }, { "epoch": 23.01, "learning_rate": 3.37923393478949e-05, "loss": 0.7942, "step": 2750 }, { "epoch": 23.01, "learning_rate": 3.371320037986705e-05, "loss": 0.4468, "step": 2760 }, { "epoch": 23.01, "learning_rate": 3.363406141183919e-05, "loss": 0.6931, "step": 2770 }, { "epoch": 23.01, "learning_rate": 3.355492244381133e-05, "loss": 0.5835, "step": 2780 }, { "epoch": 23.01, "learning_rate": 3.347578347578348e-05, "loss": 0.7099, "step": 2790 }, { "epoch": 23.01, "learning_rate": 3.339664450775562e-05, "loss": 0.6785, "step": 2800 }, { "epoch": 23.01, "learning_rate": 3.331750553972776e-05, "loss": 0.3025, "step": 2810 }, { "epoch": 23.02, "learning_rate": 3.323836657169991e-05, "loss": 0.9246, "step": 2820 }, { "epoch": 23.02, "learning_rate": 3.3159227603672046e-05, "loss": 1.0003, "step": 2830 }, { "epoch": 23.02, "eval_accuracy": 0.7327188940092166, "eval_loss": 0.7810962796211243, "eval_runtime": 298.2745, "eval_samples_per_second": 0.728, "eval_steps_per_second": 0.184, "step": 2832 }, { "epoch": 24.0, "learning_rate": 3.308008863564419e-05, "loss": 1.1583, "step": 2840 }, { "epoch": 24.0, "learning_rate": 3.300094966761634e-05, "loss": 0.4923, "step": 2850 }, { "epoch": 24.0, "learning_rate": 3.292181069958848e-05, "loss": 0.4868, "step": 2860 }, { "epoch": 24.01, "learning_rate": 3.284267173156062e-05, "loss": 0.7352, "step": 2870 }, { "epoch": 24.01, "learning_rate": 3.2763532763532764e-05, "loss": 0.8188, "step": 2880 }, { "epoch": 24.01, "learning_rate": 3.268439379550491e-05, "loss": 0.8439, "step": 2890 }, { "epoch": 24.01, "learning_rate": 3.260525482747705e-05, "loss": 0.821, "step": 2900 }, { "epoch": 24.01, "learning_rate": 3.25261158594492e-05, "loss": 0.6092, "step": 2910 }, { "epoch": 24.01, "learning_rate": 3.244697689142134e-05, "loss": 0.7416, "step": 2920 }, { "epoch": 24.01, "learning_rate": 3.236783792339348e-05, "loss": 0.4177, "step": 2930 }, { "epoch": 24.02, "learning_rate": 3.2288698955365625e-05, "loss": 0.5732, "step": 2940 }, { "epoch": 24.02, "learning_rate": 3.220955998733777e-05, "loss": 0.6666, "step": 2950 }, { "epoch": 24.02, "eval_accuracy": 0.7465437788018433, "eval_loss": 0.7551702857017517, "eval_runtime": 297.83, "eval_samples_per_second": 0.729, "eval_steps_per_second": 0.185, "step": 2950 }, { "epoch": 25.0, "learning_rate": 3.213042101930991e-05, "loss": 0.5516, "step": 2960 }, { "epoch": 25.0, "learning_rate": 3.205128205128206e-05, "loss": 0.5643, "step": 2970 }, { "epoch": 25.0, "learning_rate": 3.1972143083254193e-05, "loss": 0.7, "step": 2980 }, { "epoch": 25.01, "learning_rate": 3.1893004115226336e-05, "loss": 0.4949, "step": 2990 }, { "epoch": 25.01, "learning_rate": 3.181386514719848e-05, "loss": 0.6588, "step": 3000 }, { "epoch": 25.01, "learning_rate": 3.173472617917063e-05, "loss": 0.6374, "step": 3010 }, { "epoch": 25.01, "learning_rate": 3.165558721114277e-05, "loss": 0.4516, "step": 3020 }, { "epoch": 25.01, "learning_rate": 3.157644824311491e-05, "loss": 0.6673, "step": 3030 }, { "epoch": 25.01, "learning_rate": 3.1497309275087054e-05, "loss": 0.6344, "step": 3040 }, { "epoch": 25.01, "learning_rate": 3.1418170307059196e-05, "loss": 0.422, "step": 3050 }, { "epoch": 25.02, "learning_rate": 3.133903133903134e-05, "loss": 0.8527, "step": 3060 }, { "epoch": 25.02, "eval_accuracy": 0.7188940092165899, "eval_loss": 0.8201001286506653, "eval_runtime": 289.214, "eval_samples_per_second": 0.75, "eval_steps_per_second": 0.19, "step": 3068 }, { "epoch": 26.0, "learning_rate": 3.125989237100349e-05, "loss": 0.71, "step": 3070 }, { "epoch": 26.0, "learning_rate": 3.118075340297563e-05, "loss": 0.7812, "step": 3080 }, { "epoch": 26.0, "learning_rate": 3.1101614434947765e-05, "loss": 0.4559, "step": 3090 }, { "epoch": 26.0, "learning_rate": 3.1022475466919914e-05, "loss": 0.5269, "step": 3100 }, { "epoch": 26.01, "learning_rate": 3.0943336498892056e-05, "loss": 0.75, "step": 3110 }, { "epoch": 26.01, "learning_rate": 3.08641975308642e-05, "loss": 0.7118, "step": 3120 }, { "epoch": 26.01, "learning_rate": 3.078505856283635e-05, "loss": 0.7999, "step": 3130 }, { "epoch": 26.01, "learning_rate": 3.070591959480848e-05, "loss": 0.4535, "step": 3140 }, { "epoch": 26.01, "learning_rate": 3.0626780626780625e-05, "loss": 0.7403, "step": 3150 }, { "epoch": 26.01, "learning_rate": 3.0547641658752774e-05, "loss": 0.7067, "step": 3160 }, { "epoch": 26.01, "learning_rate": 3.0468502690724916e-05, "loss": 0.6169, "step": 3170 }, { "epoch": 26.02, "learning_rate": 3.0389363722697055e-05, "loss": 0.4678, "step": 3180 }, { "epoch": 26.02, "eval_accuracy": 0.695852534562212, "eval_loss": 1.0259956121444702, "eval_runtime": 293.3586, "eval_samples_per_second": 0.74, "eval_steps_per_second": 0.187, "step": 3186 }, { "epoch": 27.0, "learning_rate": 3.0310224754669204e-05, "loss": 0.4734, "step": 3190 }, { "epoch": 27.0, "learning_rate": 3.0231085786641343e-05, "loss": 0.6945, "step": 3200 }, { "epoch": 27.0, "learning_rate": 3.0151946818613485e-05, "loss": 0.667, "step": 3210 }, { "epoch": 27.0, "learning_rate": 3.0072807850585634e-05, "loss": 0.6138, "step": 3220 }, { "epoch": 27.01, "learning_rate": 2.9993668882557773e-05, "loss": 0.7627, "step": 3230 }, { "epoch": 27.01, "learning_rate": 2.9914529914529915e-05, "loss": 0.8703, "step": 3240 }, { "epoch": 27.01, "learning_rate": 2.9835390946502057e-05, "loss": 0.6306, "step": 3250 }, { "epoch": 27.01, "learning_rate": 2.9756251978474203e-05, "loss": 0.7293, "step": 3260 }, { "epoch": 27.01, "learning_rate": 2.9677113010446345e-05, "loss": 0.5378, "step": 3270 }, { "epoch": 27.01, "learning_rate": 2.9597974042418487e-05, "loss": 0.9667, "step": 3280 }, { "epoch": 27.01, "learning_rate": 2.9518835074390633e-05, "loss": 0.4905, "step": 3290 }, { "epoch": 27.02, "learning_rate": 2.9439696106362775e-05, "loss": 0.7354, "step": 3300 }, { "epoch": 27.02, "eval_accuracy": 0.6866359447004609, "eval_loss": 0.8520135879516602, "eval_runtime": 253.7054, "eval_samples_per_second": 0.855, "eval_steps_per_second": 0.217, "step": 3304 }, { "epoch": 28.0, "learning_rate": 2.9360557138334914e-05, "loss": 0.4119, "step": 3310 }, { "epoch": 28.0, "learning_rate": 2.9281418170307063e-05, "loss": 0.5532, "step": 3320 }, { "epoch": 28.0, "learning_rate": 2.9202279202279202e-05, "loss": 0.5374, "step": 3330 }, { "epoch": 28.01, "learning_rate": 2.9123140234251344e-05, "loss": 0.4628, "step": 3340 }, { "epoch": 28.01, "learning_rate": 2.9044001266223493e-05, "loss": 0.6509, "step": 3350 }, { "epoch": 28.01, "learning_rate": 2.8964862298195632e-05, "loss": 0.3157, "step": 3360 }, { "epoch": 28.01, "learning_rate": 2.8885723330167774e-05, "loss": 0.5705, "step": 3370 }, { "epoch": 28.01, "learning_rate": 2.880658436213992e-05, "loss": 0.5273, "step": 3380 }, { "epoch": 28.01, "learning_rate": 2.8727445394112062e-05, "loss": 0.4847, "step": 3390 }, { "epoch": 28.01, "learning_rate": 2.8648306426084204e-05, "loss": 0.5194, "step": 3400 }, { "epoch": 28.02, "learning_rate": 2.856916745805635e-05, "loss": 0.391, "step": 3410 }, { "epoch": 28.02, "learning_rate": 2.8490028490028492e-05, "loss": 1.1097, "step": 3420 }, { "epoch": 28.02, "eval_accuracy": 0.7327188940092166, "eval_loss": 0.9238936901092529, "eval_runtime": 306.5306, "eval_samples_per_second": 0.708, "eval_steps_per_second": 0.179, "step": 3422 }, { "epoch": 29.0, "learning_rate": 2.8410889522000634e-05, "loss": 0.7316, "step": 3430 }, { "epoch": 29.0, "learning_rate": 2.833175055397278e-05, "loss": 0.5964, "step": 3440 }, { "epoch": 29.0, "learning_rate": 2.8252611585944922e-05, "loss": 0.4294, "step": 3450 }, { "epoch": 29.01, "learning_rate": 2.817347261791706e-05, "loss": 0.5873, "step": 3460 }, { "epoch": 29.01, "learning_rate": 2.8094333649889203e-05, "loss": 0.5362, "step": 3470 }, { "epoch": 29.01, "learning_rate": 2.8015194681861352e-05, "loss": 0.7654, "step": 3480 }, { "epoch": 29.01, "learning_rate": 2.793605571383349e-05, "loss": 0.6749, "step": 3490 }, { "epoch": 29.01, "learning_rate": 2.7856916745805633e-05, "loss": 0.392, "step": 3500 }, { "epoch": 29.01, "learning_rate": 2.777777777777778e-05, "loss": 0.9069, "step": 3510 }, { "epoch": 29.01, "learning_rate": 2.769863880974992e-05, "loss": 0.4994, "step": 3520 }, { "epoch": 29.02, "learning_rate": 2.7619499841722064e-05, "loss": 0.7977, "step": 3530 }, { "epoch": 29.02, "learning_rate": 2.754036087369421e-05, "loss": 0.6264, "step": 3540 }, { "epoch": 29.02, "eval_accuracy": 0.7557603686635944, "eval_loss": 0.6894146800041199, "eval_runtime": 290.6314, "eval_samples_per_second": 0.747, "eval_steps_per_second": 0.189, "step": 3540 }, { "epoch": 30.0, "learning_rate": 2.746122190566635e-05, "loss": 1.0999, "step": 3550 }, { "epoch": 30.0, "learning_rate": 2.7382082937638494e-05, "loss": 0.5372, "step": 3560 }, { "epoch": 30.0, "learning_rate": 2.730294396961064e-05, "loss": 0.595, "step": 3570 }, { "epoch": 30.01, "learning_rate": 2.722380500158278e-05, "loss": 0.9679, "step": 3580 }, { "epoch": 30.01, "learning_rate": 2.714466603355492e-05, "loss": 0.9635, "step": 3590 }, { "epoch": 30.01, "learning_rate": 2.706552706552707e-05, "loss": 0.3871, "step": 3600 }, { "epoch": 30.01, "learning_rate": 2.698638809749921e-05, "loss": 0.4446, "step": 3610 }, { "epoch": 30.01, "learning_rate": 2.690724912947135e-05, "loss": 0.587, "step": 3620 }, { "epoch": 30.01, "learning_rate": 2.68281101614435e-05, "loss": 0.5359, "step": 3630 }, { "epoch": 30.01, "learning_rate": 2.6748971193415638e-05, "loss": 0.7691, "step": 3640 }, { "epoch": 30.02, "learning_rate": 2.666983222538778e-05, "loss": 0.3348, "step": 3650 }, { "epoch": 30.02, "eval_accuracy": 0.8064516129032258, "eval_loss": 0.6229776740074158, "eval_runtime": 293.7254, "eval_samples_per_second": 0.739, "eval_steps_per_second": 0.187, "step": 3658 }, { "epoch": 31.0, "learning_rate": 2.6590693257359926e-05, "loss": 0.9398, "step": 3660 }, { "epoch": 31.0, "learning_rate": 2.651155428933207e-05, "loss": 0.6431, "step": 3670 }, { "epoch": 31.0, "learning_rate": 2.643241532130421e-05, "loss": 0.3879, "step": 3680 }, { "epoch": 31.0, "learning_rate": 2.6353276353276356e-05, "loss": 0.4384, "step": 3690 }, { "epoch": 31.01, "learning_rate": 2.62741373852485e-05, "loss": 0.6234, "step": 3700 }, { "epoch": 31.01, "learning_rate": 2.619499841722064e-05, "loss": 0.7331, "step": 3710 }, { "epoch": 31.01, "learning_rate": 2.611585944919278e-05, "loss": 0.3598, "step": 3720 }, { "epoch": 31.01, "learning_rate": 2.603672048116493e-05, "loss": 0.6426, "step": 3730 }, { "epoch": 31.01, "learning_rate": 2.595758151313707e-05, "loss": 0.5889, "step": 3740 }, { "epoch": 31.01, "learning_rate": 2.587844254510921e-05, "loss": 0.6661, "step": 3750 }, { "epoch": 31.01, "learning_rate": 2.579930357708136e-05, "loss": 0.672, "step": 3760 }, { "epoch": 31.02, "learning_rate": 2.5720164609053497e-05, "loss": 0.5548, "step": 3770 }, { "epoch": 31.02, "eval_accuracy": 0.8202764976958525, "eval_loss": 0.6430536508560181, "eval_runtime": 296.7673, "eval_samples_per_second": 0.731, "eval_steps_per_second": 0.185, "step": 3776 }, { "epoch": 32.0, "learning_rate": 2.564102564102564e-05, "loss": 0.6612, "step": 3780 }, { "epoch": 32.0, "learning_rate": 2.5561886672997785e-05, "loss": 0.7013, "step": 3790 }, { "epoch": 32.0, "learning_rate": 2.5482747704969927e-05, "loss": 0.7166, "step": 3800 }, { "epoch": 32.0, "learning_rate": 2.540360873694207e-05, "loss": 0.8458, "step": 3810 }, { "epoch": 32.01, "learning_rate": 2.5324469768914215e-05, "loss": 0.7025, "step": 3820 }, { "epoch": 32.01, "learning_rate": 2.5245330800886358e-05, "loss": 0.4634, "step": 3830 }, { "epoch": 32.01, "learning_rate": 2.51661918328585e-05, "loss": 0.7509, "step": 3840 }, { "epoch": 32.01, "learning_rate": 2.5087052864830645e-05, "loss": 0.5975, "step": 3850 }, { "epoch": 32.01, "learning_rate": 2.5007913896802788e-05, "loss": 0.6689, "step": 3860 }, { "epoch": 32.01, "learning_rate": 2.492877492877493e-05, "loss": 0.4826, "step": 3870 }, { "epoch": 32.01, "learning_rate": 2.4849635960747072e-05, "loss": 0.4976, "step": 3880 }, { "epoch": 32.02, "learning_rate": 2.4770496992719218e-05, "loss": 0.4242, "step": 3890 }, { "epoch": 32.02, "eval_accuracy": 0.7050691244239631, "eval_loss": 0.8081349730491638, "eval_runtime": 298.374, "eval_samples_per_second": 0.727, "eval_steps_per_second": 0.184, "step": 3894 }, { "epoch": 33.0, "learning_rate": 2.4691358024691357e-05, "loss": 0.3113, "step": 3900 }, { "epoch": 33.0, "learning_rate": 2.4612219056663502e-05, "loss": 0.6925, "step": 3910 }, { "epoch": 33.0, "learning_rate": 2.4533080088635644e-05, "loss": 0.6335, "step": 3920 }, { "epoch": 33.01, "learning_rate": 2.4453941120607787e-05, "loss": 0.5331, "step": 3930 }, { "epoch": 33.01, "learning_rate": 2.4374802152579932e-05, "loss": 0.6092, "step": 3940 }, { "epoch": 33.01, "learning_rate": 2.4295663184552074e-05, "loss": 0.534, "step": 3950 }, { "epoch": 33.01, "learning_rate": 2.4216524216524217e-05, "loss": 0.4707, "step": 3960 }, { "epoch": 33.01, "learning_rate": 2.4137385248496362e-05, "loss": 0.5926, "step": 3970 }, { "epoch": 33.01, "learning_rate": 2.4058246280468505e-05, "loss": 0.2674, "step": 3980 }, { "epoch": 33.01, "learning_rate": 2.3979107312440647e-05, "loss": 0.9024, "step": 3990 }, { "epoch": 33.02, "learning_rate": 2.389996834441279e-05, "loss": 0.3402, "step": 4000 }, { "epoch": 33.02, "learning_rate": 2.3820829376384935e-05, "loss": 0.5805, "step": 4010 }, { "epoch": 33.02, "eval_accuracy": 0.8202764976958525, "eval_loss": 0.5598491430282593, "eval_runtime": 287.693, "eval_samples_per_second": 0.754, "eval_steps_per_second": 0.191, "step": 4012 }, { "epoch": 34.0, "learning_rate": 2.3741690408357077e-05, "loss": 0.6835, "step": 4020 }, { "epoch": 34.0, "learning_rate": 2.366255144032922e-05, "loss": 0.6466, "step": 4030 }, { "epoch": 34.0, "learning_rate": 2.358341247230136e-05, "loss": 0.3698, "step": 4040 }, { "epoch": 34.01, "learning_rate": 2.3504273504273504e-05, "loss": 0.4723, "step": 4050 }, { "epoch": 34.01, "learning_rate": 2.342513453624565e-05, "loss": 0.7078, "step": 4060 }, { "epoch": 34.01, "learning_rate": 2.334599556821779e-05, "loss": 0.6876, "step": 4070 }, { "epoch": 34.01, "learning_rate": 2.3266856600189934e-05, "loss": 0.4859, "step": 4080 }, { "epoch": 34.01, "learning_rate": 2.318771763216208e-05, "loss": 0.5394, "step": 4090 }, { "epoch": 34.01, "learning_rate": 2.310857866413422e-05, "loss": 0.4636, "step": 4100 }, { "epoch": 34.01, "learning_rate": 2.3029439696106364e-05, "loss": 0.5758, "step": 4110 }, { "epoch": 34.02, "learning_rate": 2.295030072807851e-05, "loss": 0.4957, "step": 4120 }, { "epoch": 34.02, "learning_rate": 2.2871161760050648e-05, "loss": 0.7064, "step": 4130 }, { "epoch": 34.02, "eval_accuracy": 0.7926267281105991, "eval_loss": 0.7340723276138306, "eval_runtime": 254.6894, "eval_samples_per_second": 0.852, "eval_steps_per_second": 0.216, "step": 4130 }, { "epoch": 35.0, "learning_rate": 2.2792022792022794e-05, "loss": 0.4694, "step": 4140 }, { "epoch": 35.0, "learning_rate": 2.2712883823994936e-05, "loss": 0.6956, "step": 4150 }, { "epoch": 35.0, "learning_rate": 2.2633744855967078e-05, "loss": 0.4924, "step": 4160 }, { "epoch": 35.01, "learning_rate": 2.2554605887939224e-05, "loss": 0.7678, "step": 4170 }, { "epoch": 35.01, "learning_rate": 2.2475466919911363e-05, "loss": 0.6094, "step": 4180 }, { "epoch": 35.01, "learning_rate": 2.239632795188351e-05, "loss": 0.3326, "step": 4190 }, { "epoch": 35.01, "learning_rate": 2.2317188983855654e-05, "loss": 0.623, "step": 4200 }, { "epoch": 35.01, "learning_rate": 2.2238050015827793e-05, "loss": 0.6507, "step": 4210 }, { "epoch": 35.01, "learning_rate": 2.215891104779994e-05, "loss": 0.5673, "step": 4220 }, { "epoch": 35.01, "learning_rate": 2.207977207977208e-05, "loss": 0.9691, "step": 4230 }, { "epoch": 35.02, "learning_rate": 2.2000633111744223e-05, "loss": 0.2534, "step": 4240 }, { "epoch": 35.02, "eval_accuracy": 0.783410138248848, "eval_loss": 0.6685347557067871, "eval_runtime": 253.8074, "eval_samples_per_second": 0.855, "eval_steps_per_second": 0.217, "step": 4248 }, { "epoch": 36.0, "learning_rate": 2.192149414371637e-05, "loss": 0.5305, "step": 4250 }, { "epoch": 36.0, "learning_rate": 2.184235517568851e-05, "loss": 0.3181, "step": 4260 }, { "epoch": 36.0, "learning_rate": 2.1763216207660653e-05, "loss": 0.2655, "step": 4270 }, { "epoch": 36.0, "learning_rate": 2.1684077239632795e-05, "loss": 0.546, "step": 4280 }, { "epoch": 36.01, "learning_rate": 2.1604938271604937e-05, "loss": 0.5166, "step": 4290 }, { "epoch": 36.01, "learning_rate": 2.1525799303577083e-05, "loss": 0.612, "step": 4300 }, { "epoch": 36.01, "learning_rate": 2.1446660335549225e-05, "loss": 1.0337, "step": 4310 }, { "epoch": 36.01, "learning_rate": 2.1367521367521368e-05, "loss": 0.4693, "step": 4320 }, { "epoch": 36.01, "learning_rate": 2.1288382399493513e-05, "loss": 0.535, "step": 4330 }, { "epoch": 36.01, "learning_rate": 2.1209243431465655e-05, "loss": 0.2098, "step": 4340 }, { "epoch": 36.01, "learning_rate": 2.1130104463437798e-05, "loss": 0.5005, "step": 4350 }, { "epoch": 36.02, "learning_rate": 2.105096549540994e-05, "loss": 0.7578, "step": 4360 }, { "epoch": 36.02, "eval_accuracy": 0.7603686635944701, "eval_loss": 0.7591729760169983, "eval_runtime": 256.6972, "eval_samples_per_second": 0.845, "eval_steps_per_second": 0.214, "step": 4366 }, { "epoch": 37.0, "learning_rate": 2.0971826527382085e-05, "loss": 0.7535, "step": 4370 }, { "epoch": 37.0, "learning_rate": 2.0892687559354228e-05, "loss": 0.5906, "step": 4380 }, { "epoch": 37.0, "learning_rate": 2.081354859132637e-05, "loss": 0.5422, "step": 4390 }, { "epoch": 37.0, "learning_rate": 2.0734409623298512e-05, "loss": 0.4581, "step": 4400 }, { "epoch": 37.01, "learning_rate": 2.0655270655270654e-05, "loss": 0.4551, "step": 4410 }, { "epoch": 37.01, "learning_rate": 2.05761316872428e-05, "loss": 0.528, "step": 4420 }, { "epoch": 37.01, "learning_rate": 2.0496992719214942e-05, "loss": 0.2991, "step": 4430 }, { "epoch": 37.01, "learning_rate": 2.0417853751187084e-05, "loss": 0.8417, "step": 4440 }, { "epoch": 37.01, "learning_rate": 2.033871478315923e-05, "loss": 0.924, "step": 4450 }, { "epoch": 37.01, "learning_rate": 2.0259575815131372e-05, "loss": 0.5173, "step": 4460 }, { "epoch": 37.01, "learning_rate": 2.0180436847103515e-05, "loss": 0.3303, "step": 4470 }, { "epoch": 37.02, "learning_rate": 2.010129787907566e-05, "loss": 0.5822, "step": 4480 }, { "epoch": 37.02, "eval_accuracy": 0.728110599078341, "eval_loss": 0.9471691250801086, "eval_runtime": 259.2305, "eval_samples_per_second": 0.837, "eval_steps_per_second": 0.212, "step": 4484 }, { "epoch": 38.0, "learning_rate": 2.00221589110478e-05, "loss": 0.3202, "step": 4490 }, { "epoch": 38.0, "learning_rate": 1.9943019943019945e-05, "loss": 0.4554, "step": 4500 }, { "epoch": 38.0, "learning_rate": 1.9863880974992087e-05, "loss": 0.5358, "step": 4510 }, { "epoch": 38.01, "learning_rate": 1.978474200696423e-05, "loss": 0.3341, "step": 4520 }, { "epoch": 38.01, "learning_rate": 1.9705603038936375e-05, "loss": 1.1299, "step": 4530 }, { "epoch": 38.01, "learning_rate": 1.9626464070908514e-05, "loss": 0.4959, "step": 4540 }, { "epoch": 38.01, "learning_rate": 1.954732510288066e-05, "loss": 0.2519, "step": 4550 }, { "epoch": 38.01, "learning_rate": 1.9468186134852805e-05, "loss": 0.9426, "step": 4560 }, { "epoch": 38.01, "learning_rate": 1.9389047166824944e-05, "loss": 0.3135, "step": 4570 }, { "epoch": 38.01, "learning_rate": 1.930990819879709e-05, "loss": 0.528, "step": 4580 }, { "epoch": 38.02, "learning_rate": 1.923076923076923e-05, "loss": 0.8759, "step": 4590 }, { "epoch": 38.02, "learning_rate": 1.9151630262741374e-05, "loss": 0.2939, "step": 4600 }, { "epoch": 38.02, "eval_accuracy": 0.728110599078341, "eval_loss": 0.8887839317321777, "eval_runtime": 260.522, "eval_samples_per_second": 0.833, "eval_steps_per_second": 0.211, "step": 4602 }, { "epoch": 39.0, "learning_rate": 1.907249129471352e-05, "loss": 0.7159, "step": 4610 }, { "epoch": 39.0, "learning_rate": 1.899335232668566e-05, "loss": 0.7495, "step": 4620 }, { "epoch": 39.0, "learning_rate": 1.8914213358657804e-05, "loss": 0.6435, "step": 4630 }, { "epoch": 39.01, "learning_rate": 1.8835074390629946e-05, "loss": 0.5865, "step": 4640 }, { "epoch": 39.01, "learning_rate": 1.8755935422602088e-05, "loss": 0.5622, "step": 4650 }, { "epoch": 39.01, "learning_rate": 1.8676796454574234e-05, "loss": 0.4081, "step": 4660 }, { "epoch": 39.01, "learning_rate": 1.8597657486546376e-05, "loss": 0.5133, "step": 4670 }, { "epoch": 39.01, "learning_rate": 1.8518518518518518e-05, "loss": 0.5643, "step": 4680 }, { "epoch": 39.01, "learning_rate": 1.8439379550490664e-05, "loss": 0.5888, "step": 4690 }, { "epoch": 39.01, "learning_rate": 1.8360240582462806e-05, "loss": 0.7294, "step": 4700 }, { "epoch": 39.02, "learning_rate": 1.828110161443495e-05, "loss": 0.4845, "step": 4710 }, { "epoch": 39.02, "learning_rate": 1.820196264640709e-05, "loss": 0.4795, "step": 4720 }, { "epoch": 39.02, "eval_accuracy": 0.663594470046083, "eval_loss": 1.0767979621887207, "eval_runtime": 260.495, "eval_samples_per_second": 0.833, "eval_steps_per_second": 0.211, "step": 4720 }, { "epoch": 40.0, "learning_rate": 1.8122823678379236e-05, "loss": 0.3626, "step": 4730 }, { "epoch": 40.0, "learning_rate": 1.804368471035138e-05, "loss": 0.5294, "step": 4740 }, { "epoch": 40.0, "learning_rate": 1.796454574232352e-05, "loss": 0.8263, "step": 4750 }, { "epoch": 40.01, "learning_rate": 1.7885406774295663e-05, "loss": 0.6514, "step": 4760 }, { "epoch": 40.01, "learning_rate": 1.7806267806267805e-05, "loss": 0.5644, "step": 4770 }, { "epoch": 40.01, "learning_rate": 1.772712883823995e-05, "loss": 0.5152, "step": 4780 }, { "epoch": 40.01, "learning_rate": 1.7647989870212093e-05, "loss": 0.5133, "step": 4790 }, { "epoch": 40.01, "learning_rate": 1.7568850902184235e-05, "loss": 0.6261, "step": 4800 }, { "epoch": 40.01, "learning_rate": 1.748971193415638e-05, "loss": 0.8368, "step": 4810 }, { "epoch": 40.01, "learning_rate": 1.7410572966128523e-05, "loss": 0.5701, "step": 4820 }, { "epoch": 40.02, "learning_rate": 1.7331433998100665e-05, "loss": 0.4038, "step": 4830 }, { "epoch": 40.02, "eval_accuracy": 0.8064516129032258, "eval_loss": 0.6451985836029053, "eval_runtime": 261.7369, "eval_samples_per_second": 0.829, "eval_steps_per_second": 0.21, "step": 4838 }, { "epoch": 41.0, "learning_rate": 1.725229503007281e-05, "loss": 0.4527, "step": 4840 }, { "epoch": 41.0, "learning_rate": 1.717315606204495e-05, "loss": 0.9572, "step": 4850 }, { "epoch": 41.0, "learning_rate": 1.7094017094017095e-05, "loss": 0.5401, "step": 4860 }, { "epoch": 41.0, "learning_rate": 1.7014878125989238e-05, "loss": 0.3303, "step": 4870 }, { "epoch": 41.01, "learning_rate": 1.693573915796138e-05, "loss": 0.6231, "step": 4880 }, { "epoch": 41.01, "learning_rate": 1.6856600189933525e-05, "loss": 0.4519, "step": 4890 }, { "epoch": 41.01, "learning_rate": 1.6777461221905664e-05, "loss": 0.6937, "step": 4900 }, { "epoch": 41.01, "learning_rate": 1.669832225387781e-05, "loss": 0.4248, "step": 4910 }, { "epoch": 41.01, "learning_rate": 1.6619183285849956e-05, "loss": 0.6655, "step": 4920 }, { "epoch": 41.01, "learning_rate": 1.6540044317822094e-05, "loss": 0.5651, "step": 4930 }, { "epoch": 41.01, "learning_rate": 1.646090534979424e-05, "loss": 0.6075, "step": 4940 }, { "epoch": 41.02, "learning_rate": 1.6381766381766382e-05, "loss": 0.8347, "step": 4950 }, { "epoch": 41.02, "eval_accuracy": 0.7926267281105991, "eval_loss": 0.7040404677391052, "eval_runtime": 257.2136, "eval_samples_per_second": 0.844, "eval_steps_per_second": 0.214, "step": 4956 }, { "epoch": 42.0, "learning_rate": 1.6302627413738524e-05, "loss": 0.7086, "step": 4960 }, { "epoch": 42.0, "learning_rate": 1.622348844571067e-05, "loss": 0.6848, "step": 4970 }, { "epoch": 42.0, "learning_rate": 1.6144349477682812e-05, "loss": 0.718, "step": 4980 }, { "epoch": 42.0, "learning_rate": 1.6065210509654955e-05, "loss": 0.3821, "step": 4990 }, { "epoch": 42.01, "learning_rate": 1.5986071541627097e-05, "loss": 0.488, "step": 5000 }, { "epoch": 42.01, "learning_rate": 1.590693257359924e-05, "loss": 0.3441, "step": 5010 }, { "epoch": 42.01, "learning_rate": 1.5827793605571385e-05, "loss": 0.5323, "step": 5020 }, { "epoch": 42.01, "learning_rate": 1.5748654637543527e-05, "loss": 0.3723, "step": 5030 }, { "epoch": 42.01, "learning_rate": 1.566951566951567e-05, "loss": 0.5536, "step": 5040 }, { "epoch": 42.01, "learning_rate": 1.5590376701487815e-05, "loss": 0.2562, "step": 5050 }, { "epoch": 42.01, "learning_rate": 1.5511237733459957e-05, "loss": 0.4913, "step": 5060 }, { "epoch": 42.02, "learning_rate": 1.54320987654321e-05, "loss": 0.4113, "step": 5070 }, { "epoch": 42.02, "eval_accuracy": 0.7373271889400922, "eval_loss": 0.8011646866798401, "eval_runtime": 252.0655, "eval_samples_per_second": 0.861, "eval_steps_per_second": 0.218, "step": 5074 }, { "epoch": 43.0, "learning_rate": 1.535295979740424e-05, "loss": 0.1942, "step": 5080 }, { "epoch": 43.0, "learning_rate": 1.5273820829376387e-05, "loss": 0.6847, "step": 5090 }, { "epoch": 43.0, "learning_rate": 1.5194681861348528e-05, "loss": 0.5127, "step": 5100 }, { "epoch": 43.01, "learning_rate": 1.5115542893320671e-05, "loss": 0.18, "step": 5110 }, { "epoch": 43.01, "learning_rate": 1.5036403925292817e-05, "loss": 0.6956, "step": 5120 }, { "epoch": 43.01, "learning_rate": 1.4957264957264958e-05, "loss": 0.7358, "step": 5130 }, { "epoch": 43.01, "learning_rate": 1.4878125989237102e-05, "loss": 0.7034, "step": 5140 }, { "epoch": 43.01, "learning_rate": 1.4798987021209244e-05, "loss": 0.5671, "step": 5150 }, { "epoch": 43.01, "learning_rate": 1.4719848053181388e-05, "loss": 0.276, "step": 5160 }, { "epoch": 43.01, "learning_rate": 1.4640709085153532e-05, "loss": 0.5131, "step": 5170 }, { "epoch": 43.02, "learning_rate": 1.4561570117125672e-05, "loss": 0.6712, "step": 5180 }, { "epoch": 43.02, "learning_rate": 1.4482431149097816e-05, "loss": 0.3681, "step": 5190 }, { "epoch": 43.02, "eval_accuracy": 0.7880184331797235, "eval_loss": 0.762200117111206, "eval_runtime": 254.5668, "eval_samples_per_second": 0.852, "eval_steps_per_second": 0.216, "step": 5192 }, { "epoch": 44.0, "learning_rate": 1.440329218106996e-05, "loss": 0.4388, "step": 5200 }, { "epoch": 44.0, "learning_rate": 1.4324153213042102e-05, "loss": 0.4731, "step": 5210 }, { "epoch": 44.0, "learning_rate": 1.4245014245014246e-05, "loss": 0.4798, "step": 5220 }, { "epoch": 44.01, "learning_rate": 1.416587527698639e-05, "loss": 0.3783, "step": 5230 }, { "epoch": 44.01, "learning_rate": 1.408673630895853e-05, "loss": 0.2336, "step": 5240 }, { "epoch": 44.01, "learning_rate": 1.4007597340930676e-05, "loss": 0.3797, "step": 5250 }, { "epoch": 44.01, "learning_rate": 1.3928458372902817e-05, "loss": 0.4423, "step": 5260 }, { "epoch": 44.01, "learning_rate": 1.384931940487496e-05, "loss": 0.5598, "step": 5270 }, { "epoch": 44.01, "learning_rate": 1.3770180436847105e-05, "loss": 0.5773, "step": 5280 }, { "epoch": 44.01, "learning_rate": 1.3691041468819247e-05, "loss": 0.3113, "step": 5290 }, { "epoch": 44.02, "learning_rate": 1.361190250079139e-05, "loss": 0.2841, "step": 5300 }, { "epoch": 44.02, "learning_rate": 1.3532763532763535e-05, "loss": 1.0092, "step": 5310 }, { "epoch": 44.02, "eval_accuracy": 0.7880184331797235, "eval_loss": 0.7931644916534424, "eval_runtime": 259.7621, "eval_samples_per_second": 0.835, "eval_steps_per_second": 0.212, "step": 5310 }, { "epoch": 45.0, "learning_rate": 1.3453624564735675e-05, "loss": 0.6327, "step": 5320 }, { "epoch": 45.0, "learning_rate": 1.3374485596707819e-05, "loss": 1.0639, "step": 5330 }, { "epoch": 45.0, "learning_rate": 1.3295346628679963e-05, "loss": 0.2824, "step": 5340 }, { "epoch": 45.01, "learning_rate": 1.3216207660652105e-05, "loss": 0.3033, "step": 5350 }, { "epoch": 45.01, "learning_rate": 1.313706869262425e-05, "loss": 0.5596, "step": 5360 }, { "epoch": 45.01, "learning_rate": 1.305792972459639e-05, "loss": 0.4285, "step": 5370 }, { "epoch": 45.01, "learning_rate": 1.2978790756568535e-05, "loss": 0.7668, "step": 5380 }, { "epoch": 45.01, "learning_rate": 1.289965178854068e-05, "loss": 0.6117, "step": 5390 }, { "epoch": 45.01, "learning_rate": 1.282051282051282e-05, "loss": 0.5597, "step": 5400 }, { "epoch": 45.01, "learning_rate": 1.2741373852484964e-05, "loss": 0.2782, "step": 5410 }, { "epoch": 45.02, "learning_rate": 1.2662234884457108e-05, "loss": 0.321, "step": 5420 }, { "epoch": 45.02, "eval_accuracy": 0.7373271889400922, "eval_loss": 0.9068748354911804, "eval_runtime": 259.5409, "eval_samples_per_second": 0.836, "eval_steps_per_second": 0.212, "step": 5428 }, { "epoch": 46.0, "learning_rate": 1.258309591642925e-05, "loss": 0.5204, "step": 5430 }, { "epoch": 46.0, "learning_rate": 1.2503956948401394e-05, "loss": 0.5057, "step": 5440 }, { "epoch": 46.0, "learning_rate": 1.2424817980373536e-05, "loss": 0.5511, "step": 5450 }, { "epoch": 46.0, "learning_rate": 1.2345679012345678e-05, "loss": 0.6262, "step": 5460 }, { "epoch": 46.01, "learning_rate": 1.2266540044317822e-05, "loss": 0.3925, "step": 5470 }, { "epoch": 46.01, "learning_rate": 1.2187401076289966e-05, "loss": 0.5744, "step": 5480 }, { "epoch": 46.01, "learning_rate": 1.2108262108262108e-05, "loss": 0.4216, "step": 5490 }, { "epoch": 46.01, "learning_rate": 1.2029123140234252e-05, "loss": 0.6228, "step": 5500 }, { "epoch": 46.01, "learning_rate": 1.1949984172206395e-05, "loss": 0.3394, "step": 5510 }, { "epoch": 46.01, "learning_rate": 1.1870845204178538e-05, "loss": 0.7777, "step": 5520 }, { "epoch": 46.01, "learning_rate": 1.179170623615068e-05, "loss": 0.3025, "step": 5530 }, { "epoch": 46.02, "learning_rate": 1.1712567268122825e-05, "loss": 0.399, "step": 5540 }, { "epoch": 46.02, "eval_accuracy": 0.8110599078341014, "eval_loss": 0.6439275741577148, "eval_runtime": 263.8811, "eval_samples_per_second": 0.822, "eval_steps_per_second": 0.208, "step": 5546 }, { "epoch": 47.0, "learning_rate": 1.1633428300094967e-05, "loss": 0.6139, "step": 5550 }, { "epoch": 47.0, "learning_rate": 1.155428933206711e-05, "loss": 0.4756, "step": 5560 }, { "epoch": 47.0, "learning_rate": 1.1475150364039255e-05, "loss": 0.4553, "step": 5570 }, { "epoch": 47.0, "learning_rate": 1.1396011396011397e-05, "loss": 0.4806, "step": 5580 }, { "epoch": 47.01, "learning_rate": 1.1316872427983539e-05, "loss": 0.6994, "step": 5590 }, { "epoch": 47.01, "learning_rate": 1.1237733459955681e-05, "loss": 0.6582, "step": 5600 }, { "epoch": 47.01, "learning_rate": 1.1158594491927827e-05, "loss": 0.241, "step": 5610 }, { "epoch": 47.01, "learning_rate": 1.107945552389997e-05, "loss": 0.6311, "step": 5620 }, { "epoch": 47.01, "learning_rate": 1.1000316555872111e-05, "loss": 0.8412, "step": 5630 }, { "epoch": 47.01, "learning_rate": 1.0921177587844255e-05, "loss": 0.2699, "step": 5640 }, { "epoch": 47.01, "learning_rate": 1.0842038619816398e-05, "loss": 0.3901, "step": 5650 }, { "epoch": 47.02, "learning_rate": 1.0762899651788542e-05, "loss": 0.3699, "step": 5660 }, { "epoch": 47.02, "eval_accuracy": 0.7695852534562212, "eval_loss": 0.7740164399147034, "eval_runtime": 266.667, "eval_samples_per_second": 0.814, "eval_steps_per_second": 0.206, "step": 5664 }, { "epoch": 48.0, "learning_rate": 1.0683760683760684e-05, "loss": 0.8451, "step": 5670 }, { "epoch": 48.0, "learning_rate": 1.0604621715732828e-05, "loss": 0.5939, "step": 5680 }, { "epoch": 48.0, "learning_rate": 1.052548274770497e-05, "loss": 0.7695, "step": 5690 }, { "epoch": 48.01, "learning_rate": 1.0446343779677114e-05, "loss": 0.5514, "step": 5700 }, { "epoch": 48.01, "learning_rate": 1.0367204811649256e-05, "loss": 0.4196, "step": 5710 }, { "epoch": 48.01, "learning_rate": 1.02880658436214e-05, "loss": 0.2501, "step": 5720 }, { "epoch": 48.01, "learning_rate": 1.0208926875593542e-05, "loss": 0.6282, "step": 5730 }, { "epoch": 48.01, "learning_rate": 1.0129787907565686e-05, "loss": 0.5634, "step": 5740 }, { "epoch": 48.01, "learning_rate": 1.005064893953783e-05, "loss": 0.3623, "step": 5750 }, { "epoch": 48.01, "learning_rate": 9.971509971509972e-06, "loss": 0.4048, "step": 5760 }, { "epoch": 48.02, "learning_rate": 9.892371003482115e-06, "loss": 0.5855, "step": 5770 }, { "epoch": 48.02, "learning_rate": 9.813232035454257e-06, "loss": 0.4297, "step": 5780 }, { "epoch": 48.02, "eval_accuracy": 0.8248847926267281, "eval_loss": 0.6810868978500366, "eval_runtime": 261.851, "eval_samples_per_second": 0.829, "eval_steps_per_second": 0.21, "step": 5782 }, { "epoch": 49.0, "learning_rate": 9.734093067426402e-06, "loss": 0.3992, "step": 5790 }, { "epoch": 49.0, "learning_rate": 9.654954099398545e-06, "loss": 1.0214, "step": 5800 }, { "epoch": 49.0, "learning_rate": 9.575815131370687e-06, "loss": 0.4853, "step": 5810 }, { "epoch": 49.01, "learning_rate": 9.49667616334283e-06, "loss": 0.2907, "step": 5820 }, { "epoch": 49.01, "learning_rate": 9.417537195314973e-06, "loss": 0.4338, "step": 5830 }, { "epoch": 49.01, "learning_rate": 9.338398227287117e-06, "loss": 0.3648, "step": 5840 }, { "epoch": 49.01, "learning_rate": 9.259259259259259e-06, "loss": 0.8034, "step": 5850 }, { "epoch": 49.01, "learning_rate": 9.180120291231403e-06, "loss": 0.7394, "step": 5860 }, { "epoch": 49.01, "learning_rate": 9.100981323203545e-06, "loss": 0.494, "step": 5870 }, { "epoch": 49.01, "learning_rate": 9.02184235517569e-06, "loss": 0.9404, "step": 5880 }, { "epoch": 49.02, "learning_rate": 8.942703387147831e-06, "loss": 0.4178, "step": 5890 }, { "epoch": 49.02, "learning_rate": 8.863564419119975e-06, "loss": 0.2783, "step": 5900 }, { "epoch": 49.02, "eval_accuracy": 0.8525345622119815, "eval_loss": 0.586846113204956, "eval_runtime": 260.0845, "eval_samples_per_second": 0.834, "eval_steps_per_second": 0.211, "step": 5900 }, { "epoch": 50.0, "learning_rate": 8.784425451092118e-06, "loss": 0.4036, "step": 5910 }, { "epoch": 50.0, "learning_rate": 8.705286483064262e-06, "loss": 0.6644, "step": 5920 }, { "epoch": 50.0, "learning_rate": 8.626147515036405e-06, "loss": 0.2605, "step": 5930 }, { "epoch": 50.01, "learning_rate": 8.547008547008548e-06, "loss": 0.6677, "step": 5940 }, { "epoch": 50.01, "learning_rate": 8.46786957898069e-06, "loss": 0.4203, "step": 5950 }, { "epoch": 50.01, "learning_rate": 8.388730610952832e-06, "loss": 0.286, "step": 5960 }, { "epoch": 50.01, "learning_rate": 8.309591642924978e-06, "loss": 0.5586, "step": 5970 }, { "epoch": 50.01, "learning_rate": 8.23045267489712e-06, "loss": 0.5642, "step": 5980 }, { "epoch": 50.01, "learning_rate": 8.151313706869262e-06, "loss": 0.583, "step": 5990 }, { "epoch": 50.01, "learning_rate": 8.072174738841406e-06, "loss": 0.2579, "step": 6000 }, { "epoch": 50.02, "learning_rate": 7.993035770813548e-06, "loss": 0.4946, "step": 6010 }, { "epoch": 50.02, "eval_accuracy": 0.7926267281105991, "eval_loss": 0.673189103603363, "eval_runtime": 263.3409, "eval_samples_per_second": 0.824, "eval_steps_per_second": 0.209, "step": 6018 }, { "epoch": 51.0, "learning_rate": 7.913896802785692e-06, "loss": 0.2915, "step": 6020 }, { "epoch": 51.0, "learning_rate": 7.834757834757835e-06, "loss": 0.4099, "step": 6030 }, { "epoch": 51.0, "learning_rate": 7.755618866729978e-06, "loss": 0.3578, "step": 6040 }, { "epoch": 51.0, "learning_rate": 7.67647989870212e-06, "loss": 0.6813, "step": 6050 }, { "epoch": 51.01, "learning_rate": 7.597340930674264e-06, "loss": 0.7868, "step": 6060 }, { "epoch": 51.01, "learning_rate": 7.5182019626464085e-06, "loss": 0.3421, "step": 6070 }, { "epoch": 51.01, "learning_rate": 7.439062994618551e-06, "loss": 0.3972, "step": 6080 }, { "epoch": 51.01, "learning_rate": 7.359924026590694e-06, "loss": 0.4223, "step": 6090 }, { "epoch": 51.01, "learning_rate": 7.280785058562836e-06, "loss": 0.4222, "step": 6100 }, { "epoch": 51.01, "learning_rate": 7.20164609053498e-06, "loss": 0.5646, "step": 6110 }, { "epoch": 51.01, "learning_rate": 7.122507122507123e-06, "loss": 0.5948, "step": 6120 }, { "epoch": 51.02, "learning_rate": 7.043368154479265e-06, "loss": 0.3058, "step": 6130 }, { "epoch": 51.02, "eval_accuracy": 0.8341013824884793, "eval_loss": 0.551148533821106, "eval_runtime": 267.39, "eval_samples_per_second": 0.812, "eval_steps_per_second": 0.206, "step": 6136 }, { "epoch": 52.0, "learning_rate": 6.964229186451408e-06, "loss": 0.4489, "step": 6140 }, { "epoch": 52.0, "learning_rate": 6.885090218423552e-06, "loss": 0.6678, "step": 6150 }, { "epoch": 52.0, "learning_rate": 6.805951250395695e-06, "loss": 0.4425, "step": 6160 }, { "epoch": 52.0, "learning_rate": 6.726812282367838e-06, "loss": 0.4, "step": 6170 }, { "epoch": 52.01, "learning_rate": 6.6476733143399815e-06, "loss": 0.6218, "step": 6180 }, { "epoch": 52.01, "learning_rate": 6.568534346312125e-06, "loss": 0.4908, "step": 6190 }, { "epoch": 52.01, "learning_rate": 6.489395378284268e-06, "loss": 0.4602, "step": 6200 }, { "epoch": 52.01, "learning_rate": 6.41025641025641e-06, "loss": 0.3036, "step": 6210 }, { "epoch": 52.01, "learning_rate": 6.331117442228554e-06, "loss": 0.229, "step": 6220 }, { "epoch": 52.01, "learning_rate": 6.251978474200697e-06, "loss": 0.5506, "step": 6230 }, { "epoch": 52.01, "learning_rate": 6.172839506172839e-06, "loss": 0.201, "step": 6240 }, { "epoch": 52.02, "learning_rate": 6.093700538144983e-06, "loss": 0.1286, "step": 6250 }, { "epoch": 52.02, "eval_accuracy": 0.8294930875576036, "eval_loss": 0.5877251029014587, "eval_runtime": 267.204, "eval_samples_per_second": 0.812, "eval_steps_per_second": 0.206, "step": 6254 }, { "epoch": 53.0, "learning_rate": 6.014561570117126e-06, "loss": 0.5248, "step": 6260 }, { "epoch": 53.0, "learning_rate": 5.935422602089269e-06, "loss": 0.5, "step": 6270 }, { "epoch": 53.0, "learning_rate": 5.856283634061412e-06, "loss": 0.484, "step": 6280 }, { "epoch": 53.01, "learning_rate": 5.777144666033555e-06, "loss": 0.4989, "step": 6290 }, { "epoch": 53.01, "learning_rate": 5.6980056980056985e-06, "loss": 0.71, "step": 6300 }, { "epoch": 53.01, "learning_rate": 5.618866729977841e-06, "loss": 0.8, "step": 6310 }, { "epoch": 53.01, "learning_rate": 5.539727761949985e-06, "loss": 0.5235, "step": 6320 }, { "epoch": 53.01, "learning_rate": 5.460588793922128e-06, "loss": 0.9647, "step": 6330 }, { "epoch": 53.01, "learning_rate": 5.381449825894271e-06, "loss": 0.3382, "step": 6340 }, { "epoch": 53.01, "learning_rate": 5.302310857866414e-06, "loss": 0.4008, "step": 6350 }, { "epoch": 53.02, "learning_rate": 5.223171889838557e-06, "loss": 0.5169, "step": 6360 }, { "epoch": 53.02, "learning_rate": 5.1440329218107e-06, "loss": 0.2013, "step": 6370 }, { "epoch": 53.02, "eval_accuracy": 0.815668202764977, "eval_loss": 0.6507552266120911, "eval_runtime": 274.5228, "eval_samples_per_second": 0.79, "eval_steps_per_second": 0.2, "step": 6372 }, { "epoch": 54.0, "learning_rate": 5.064893953782843e-06, "loss": 1.0097, "step": 6380 }, { "epoch": 54.0, "learning_rate": 4.985754985754986e-06, "loss": 0.2331, "step": 6390 }, { "epoch": 54.0, "learning_rate": 4.906616017727128e-06, "loss": 0.388, "step": 6400 }, { "epoch": 54.01, "learning_rate": 4.827477049699272e-06, "loss": 0.4358, "step": 6410 }, { "epoch": 54.01, "learning_rate": 4.748338081671415e-06, "loss": 0.4098, "step": 6420 }, { "epoch": 54.01, "learning_rate": 4.6691991136435585e-06, "loss": 0.3491, "step": 6430 }, { "epoch": 54.01, "learning_rate": 4.5900601456157015e-06, "loss": 0.2839, "step": 6440 }, { "epoch": 54.01, "learning_rate": 4.510921177587845e-06, "loss": 0.1628, "step": 6450 }, { "epoch": 54.01, "learning_rate": 4.431782209559988e-06, "loss": 1.0689, "step": 6460 }, { "epoch": 54.01, "learning_rate": 4.352643241532131e-06, "loss": 0.6193, "step": 6470 }, { "epoch": 54.02, "learning_rate": 4.273504273504274e-06, "loss": 0.7058, "step": 6480 }, { "epoch": 54.02, "learning_rate": 4.194365305476416e-06, "loss": 0.2027, "step": 6490 }, { "epoch": 54.02, "eval_accuracy": 0.815668202764977, "eval_loss": 0.6629670858383179, "eval_runtime": 274.2142, "eval_samples_per_second": 0.791, "eval_steps_per_second": 0.201, "step": 6490 }, { "epoch": 55.0, "learning_rate": 4.11522633744856e-06, "loss": 0.4227, "step": 6500 }, { "epoch": 55.0, "learning_rate": 4.036087369420703e-06, "loss": 0.8382, "step": 6510 }, { "epoch": 55.0, "learning_rate": 3.956948401392846e-06, "loss": 0.5188, "step": 6520 }, { "epoch": 55.01, "learning_rate": 3.877809433364989e-06, "loss": 0.4906, "step": 6530 }, { "epoch": 55.01, "learning_rate": 3.798670465337132e-06, "loss": 0.4729, "step": 6540 }, { "epoch": 55.01, "learning_rate": 3.7195314973092754e-06, "loss": 0.6123, "step": 6550 }, { "epoch": 55.01, "learning_rate": 3.640392529281418e-06, "loss": 0.5144, "step": 6560 }, { "epoch": 55.01, "learning_rate": 3.5612535612535615e-06, "loss": 0.863, "step": 6570 }, { "epoch": 55.01, "learning_rate": 3.482114593225704e-06, "loss": 0.4071, "step": 6580 }, { "epoch": 55.01, "learning_rate": 3.4029756251978477e-06, "loss": 0.5755, "step": 6590 }, { "epoch": 55.02, "learning_rate": 3.3238366571699908e-06, "loss": 0.6267, "step": 6600 }, { "epoch": 55.02, "eval_accuracy": 0.8064516129032258, "eval_loss": 0.7372620701789856, "eval_runtime": 267.3033, "eval_samples_per_second": 0.812, "eval_steps_per_second": 0.206, "step": 6608 }, { "epoch": 56.0, "learning_rate": 3.244697689142134e-06, "loss": 0.3038, "step": 6610 }, { "epoch": 56.0, "learning_rate": 3.165558721114277e-06, "loss": 0.6753, "step": 6620 }, { "epoch": 56.0, "learning_rate": 3.0864197530864196e-06, "loss": 0.4761, "step": 6630 }, { "epoch": 56.0, "learning_rate": 3.007280785058563e-06, "loss": 0.4697, "step": 6640 }, { "epoch": 56.01, "learning_rate": 2.928141817030706e-06, "loss": 0.2215, "step": 6650 }, { "epoch": 56.01, "learning_rate": 2.8490028490028492e-06, "loss": 0.4558, "step": 6660 }, { "epoch": 56.01, "learning_rate": 2.7698638809749923e-06, "loss": 0.7401, "step": 6670 }, { "epoch": 56.01, "learning_rate": 2.6907249129471354e-06, "loss": 0.5682, "step": 6680 }, { "epoch": 56.01, "learning_rate": 2.6115859449192785e-06, "loss": 0.5346, "step": 6690 }, { "epoch": 56.01, "learning_rate": 2.5324469768914215e-06, "loss": 0.7019, "step": 6700 }, { "epoch": 56.01, "learning_rate": 2.453308008863564e-06, "loss": 0.4813, "step": 6710 }, { "epoch": 56.02, "learning_rate": 2.3741690408357077e-06, "loss": 0.4561, "step": 6720 }, { "epoch": 56.02, "eval_accuracy": 0.8018433179723502, "eval_loss": 0.7382919192314148, "eval_runtime": 275.3448, "eval_samples_per_second": 0.788, "eval_steps_per_second": 0.2, "step": 6726 }, { "epoch": 57.0, "learning_rate": 2.2950300728078508e-06, "loss": 0.4686, "step": 6730 }, { "epoch": 57.0, "learning_rate": 2.215891104779994e-06, "loss": 0.3041, "step": 6740 }, { "epoch": 57.0, "learning_rate": 2.136752136752137e-06, "loss": 0.241, "step": 6750 }, { "epoch": 57.0, "learning_rate": 2.05761316872428e-06, "loss": 0.4126, "step": 6760 }, { "epoch": 57.01, "learning_rate": 1.978474200696423e-06, "loss": 0.286, "step": 6770 }, { "epoch": 57.01, "learning_rate": 1.899335232668566e-06, "loss": 0.582, "step": 6780 }, { "epoch": 57.01, "learning_rate": 1.820196264640709e-06, "loss": 0.3814, "step": 6790 }, { "epoch": 57.01, "learning_rate": 1.741057296612852e-06, "loss": 0.6788, "step": 6800 }, { "epoch": 57.01, "learning_rate": 1.6619183285849954e-06, "loss": 0.44, "step": 6810 }, { "epoch": 57.01, "learning_rate": 1.5827793605571385e-06, "loss": 0.4996, "step": 6820 }, { "epoch": 57.01, "learning_rate": 1.5036403925292815e-06, "loss": 0.2839, "step": 6830 }, { "epoch": 57.02, "learning_rate": 1.4245014245014246e-06, "loss": 0.7002, "step": 6840 }, { "epoch": 57.02, "eval_accuracy": 0.8110599078341014, "eval_loss": 0.7072679400444031, "eval_runtime": 266.757, "eval_samples_per_second": 0.813, "eval_steps_per_second": 0.206, "step": 6844 }, { "epoch": 58.0, "learning_rate": 1.3453624564735677e-06, "loss": 0.215, "step": 6850 }, { "epoch": 58.0, "learning_rate": 1.2662234884457108e-06, "loss": 0.6569, "step": 6860 }, { "epoch": 58.0, "learning_rate": 1.1870845204178538e-06, "loss": 0.6116, "step": 6870 }, { "epoch": 58.01, "learning_rate": 1.107945552389997e-06, "loss": 0.3416, "step": 6880 }, { "epoch": 58.01, "learning_rate": 1.02880658436214e-06, "loss": 0.571, "step": 6890 }, { "epoch": 58.01, "learning_rate": 9.49667616334283e-07, "loss": 0.6238, "step": 6900 }, { "epoch": 58.01, "learning_rate": 8.70528648306426e-07, "loss": 0.346, "step": 6910 }, { "epoch": 58.01, "learning_rate": 7.913896802785692e-07, "loss": 0.4527, "step": 6920 }, { "epoch": 58.01, "learning_rate": 7.122507122507123e-07, "loss": 0.5342, "step": 6930 }, { "epoch": 58.01, "learning_rate": 6.331117442228554e-07, "loss": 0.523, "step": 6940 }, { "epoch": 58.02, "learning_rate": 5.539727761949985e-07, "loss": 0.2861, "step": 6950 }, { "epoch": 58.02, "learning_rate": 4.748338081671415e-07, "loss": 0.1823, "step": 6960 }, { "epoch": 58.02, "eval_accuracy": 0.8202764976958525, "eval_loss": 0.6870871186256409, "eval_runtime": 266.4451, "eval_samples_per_second": 0.814, "eval_steps_per_second": 0.206, "step": 6962 }, { "epoch": 59.0, "learning_rate": 3.956948401392846e-07, "loss": 0.5552, "step": 6970 }, { "epoch": 59.0, "learning_rate": 3.165558721114277e-07, "loss": 0.4444, "step": 6980 }, { "epoch": 59.0, "learning_rate": 2.3741690408357074e-07, "loss": 0.4752, "step": 6990 }, { "epoch": 59.01, "learning_rate": 1.5827793605571385e-07, "loss": 0.176, "step": 7000 }, { "epoch": 59.01, "learning_rate": 7.913896802785692e-08, "loss": 0.8408, "step": 7010 }, { "epoch": 59.01, "learning_rate": 0.0, "loss": 0.2439, "step": 7020 }, { "epoch": 59.01, "eval_accuracy": 0.8202764976958525, "eval_loss": 0.690118670463562, "eval_runtime": 307.1421, "eval_samples_per_second": 0.707, "eval_steps_per_second": 0.179, "step": 7020 }, { "epoch": 59.01, "step": 7020, "total_flos": 3.476987046291161e+19, "train_loss": 0.6254725841715125, "train_runtime": 65940.1985, "train_samples_per_second": 0.426, "train_steps_per_second": 0.106 }, { "epoch": 59.01, "eval_accuracy": 0.6990740740740741, "eval_loss": 1.074745774269104, "eval_runtime": 265.3059, "eval_samples_per_second": 0.814, "eval_steps_per_second": 0.204, "step": 7020 }, { "epoch": 59.01, "eval_accuracy": 0.6990740740740741, "eval_loss": 1.074745774269104, "eval_runtime": 283.9645, "eval_samples_per_second": 0.761, "eval_steps_per_second": 0.19, "step": 7020 }, { "epoch": 59.01, "eval_accuracy": 0.6990740740740741, "eval_loss": 1.0747456550598145, "eval_runtime": 273.0941, "eval_samples_per_second": 0.791, "eval_steps_per_second": 0.198, "step": 7020 }, { "epoch": 59.01, "eval_accuracy": 0.6990740740740741, "eval_loss": 1.074745774269104, "eval_runtime": 320.3996, "eval_samples_per_second": 0.674, "eval_steps_per_second": 0.169, "step": 7020 } ], "logging_steps": 10, "max_steps": 7020, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 3.476987046291161e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }