{ "best_metric": 0.8068181818181818, "best_model_checkpoint": "/Modelos/Swin-DA2-final-AMD-Wet\\checkpoint-651", "epoch": 77.41935483870968, "eval_steps": 500, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.65, "learning_rate": 4.166666666666667e-06, "loss": 1.6093, "step": 10 }, { "epoch": 0.97, "eval_accuracy": 0.20454545454545456, "eval_loss": 1.6090493202209473, "eval_runtime": 1.6312, "eval_samples_per_second": 53.948, "eval_steps_per_second": 3.678, "step": 15 }, { "epoch": 1.29, "learning_rate": 8.333333333333334e-06, "loss": 1.6084, "step": 20 }, { "epoch": 1.94, "learning_rate": 1.25e-05, "loss": 1.6042, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.20454545454545456, "eval_loss": 1.605498194694519, "eval_runtime": 1.0762, "eval_samples_per_second": 81.766, "eval_steps_per_second": 5.575, "step": 31 }, { "epoch": 2.58, "learning_rate": 1.6666666666666667e-05, "loss": 1.586, "step": 40 }, { "epoch": 2.97, "eval_accuracy": 0.19318181818181818, "eval_loss": 1.5730183124542236, "eval_runtime": 1.0502, "eval_samples_per_second": 83.791, "eval_steps_per_second": 5.713, "step": 46 }, { "epoch": 3.23, "learning_rate": 2.0833333333333336e-05, "loss": 1.5425, "step": 50 }, { "epoch": 3.87, "learning_rate": 2.5e-05, "loss": 1.4855, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.26136363636363635, "eval_loss": 1.4935040473937988, "eval_runtime": 1.0352, "eval_samples_per_second": 85.005, "eval_steps_per_second": 5.796, "step": 62 }, { "epoch": 4.52, "learning_rate": 2.916666666666667e-05, "loss": 1.3738, "step": 70 }, { "epoch": 4.97, "eval_accuracy": 0.5, "eval_loss": 1.3272513151168823, "eval_runtime": 1.0512, "eval_samples_per_second": 83.711, "eval_steps_per_second": 5.708, "step": 77 }, { "epoch": 5.16, "learning_rate": 3.3333333333333335e-05, "loss": 1.2795, "step": 80 }, { "epoch": 5.81, "learning_rate": 3.7500000000000003e-05, "loss": 1.1557, "step": 90 }, { "epoch": 6.0, "eval_accuracy": 0.625, "eval_loss": 1.0827698707580566, "eval_runtime": 1.0852, "eval_samples_per_second": 81.088, "eval_steps_per_second": 5.529, "step": 93 }, { "epoch": 6.45, "learning_rate": 4.166666666666667e-05, "loss": 1.0441, "step": 100 }, { "epoch": 6.97, "eval_accuracy": 0.6022727272727273, "eval_loss": 0.8971154689788818, "eval_runtime": 1.0662, "eval_samples_per_second": 82.533, "eval_steps_per_second": 5.627, "step": 108 }, { "epoch": 7.1, "learning_rate": 4.5833333333333334e-05, "loss": 0.9639, "step": 110 }, { "epoch": 7.74, "learning_rate": 5e-05, "loss": 0.9358, "step": 120 }, { "epoch": 8.0, "eval_accuracy": 0.6818181818181818, "eval_loss": 0.7635229229927063, "eval_runtime": 1.0302, "eval_samples_per_second": 85.418, "eval_steps_per_second": 5.824, "step": 124 }, { "epoch": 8.39, "learning_rate": 4.9537037037037035e-05, "loss": 0.7707, "step": 130 }, { "epoch": 8.97, "eval_accuracy": 0.6477272727272727, "eval_loss": 0.8717986941337585, "eval_runtime": 1.0472, "eval_samples_per_second": 84.031, "eval_steps_per_second": 5.729, "step": 139 }, { "epoch": 9.03, "learning_rate": 4.9074074074074075e-05, "loss": 0.8861, "step": 140 }, { "epoch": 9.68, "learning_rate": 4.8611111111111115e-05, "loss": 0.7161, "step": 150 }, { "epoch": 10.0, "eval_accuracy": 0.7727272727272727, "eval_loss": 0.6902639269828796, "eval_runtime": 1.0472, "eval_samples_per_second": 84.031, "eval_steps_per_second": 5.729, "step": 155 }, { "epoch": 10.32, "learning_rate": 4.814814814814815e-05, "loss": 0.7251, "step": 160 }, { "epoch": 10.97, "learning_rate": 4.768518518518519e-05, "loss": 0.6799, "step": 170 }, { "epoch": 10.97, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.8108208179473877, "eval_runtime": 1.0902, "eval_samples_per_second": 80.716, "eval_steps_per_second": 5.503, "step": 170 }, { "epoch": 11.61, "learning_rate": 4.722222222222222e-05, "loss": 0.6402, "step": 180 }, { "epoch": 12.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.7146868109703064, "eval_runtime": 1.0392, "eval_samples_per_second": 84.678, "eval_steps_per_second": 5.773, "step": 186 }, { "epoch": 12.26, "learning_rate": 4.675925925925926e-05, "loss": 0.5816, "step": 190 }, { "epoch": 12.9, "learning_rate": 4.62962962962963e-05, "loss": 0.5131, "step": 200 }, { "epoch": 12.97, "eval_accuracy": 0.75, "eval_loss": 0.7520684599876404, "eval_runtime": 1.1293, "eval_samples_per_second": 77.928, "eval_steps_per_second": 5.313, "step": 201 }, { "epoch": 13.55, "learning_rate": 4.5833333333333334e-05, "loss": 0.5262, "step": 210 }, { "epoch": 14.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.7967298626899719, "eval_runtime": 1.0822, "eval_samples_per_second": 81.313, "eval_steps_per_second": 5.544, "step": 217 }, { "epoch": 14.19, "learning_rate": 4.5370370370370374e-05, "loss": 0.5323, "step": 220 }, { "epoch": 14.84, "learning_rate": 4.490740740740741e-05, "loss": 0.4757, "step": 230 }, { "epoch": 14.97, "eval_accuracy": 0.7613636363636364, "eval_loss": 0.7084075808525085, "eval_runtime": 1.0422, "eval_samples_per_second": 84.434, "eval_steps_per_second": 5.757, "step": 232 }, { "epoch": 15.48, "learning_rate": 4.4444444444444447e-05, "loss": 0.4758, "step": 240 }, { "epoch": 16.0, "eval_accuracy": 0.7727272727272727, "eval_loss": 0.7528924345970154, "eval_runtime": 1.0702, "eval_samples_per_second": 82.224, "eval_steps_per_second": 5.606, "step": 248 }, { "epoch": 16.13, "learning_rate": 4.3981481481481486e-05, "loss": 0.4203, "step": 250 }, { "epoch": 16.77, "learning_rate": 4.351851851851852e-05, "loss": 0.4184, "step": 260 }, { "epoch": 16.97, "eval_accuracy": 0.7727272727272727, "eval_loss": 0.7768625020980835, "eval_runtime": 1.0432, "eval_samples_per_second": 84.353, "eval_steps_per_second": 5.751, "step": 263 }, { "epoch": 17.42, "learning_rate": 4.305555555555556e-05, "loss": 0.398, "step": 270 }, { "epoch": 18.0, "eval_accuracy": 0.7386363636363636, "eval_loss": 0.8496245741844177, "eval_runtime": 1.0792, "eval_samples_per_second": 81.539, "eval_steps_per_second": 5.559, "step": 279 }, { "epoch": 18.06, "learning_rate": 4.259259259259259e-05, "loss": 0.3623, "step": 280 }, { "epoch": 18.71, "learning_rate": 4.212962962962963e-05, "loss": 0.3591, "step": 290 }, { "epoch": 18.97, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.8204294443130493, "eval_runtime": 1.0712, "eval_samples_per_second": 82.148, "eval_steps_per_second": 5.601, "step": 294 }, { "epoch": 19.35, "learning_rate": 4.166666666666667e-05, "loss": 0.3449, "step": 300 }, { "epoch": 20.0, "learning_rate": 4.1203703703703705e-05, "loss": 0.3536, "step": 310 }, { "epoch": 20.0, "eval_accuracy": 0.7613636363636364, "eval_loss": 0.8589130640029907, "eval_runtime": 1.0792, "eval_samples_per_second": 81.539, "eval_steps_per_second": 5.559, "step": 310 }, { "epoch": 20.65, "learning_rate": 4.074074074074074e-05, "loss": 0.2589, "step": 320 }, { "epoch": 20.97, "eval_accuracy": 0.7045454545454546, "eval_loss": 0.9754377007484436, "eval_runtime": 1.0472, "eval_samples_per_second": 84.031, "eval_steps_per_second": 5.729, "step": 325 }, { "epoch": 21.29, "learning_rate": 4.027777777777778e-05, "loss": 0.286, "step": 330 }, { "epoch": 21.94, "learning_rate": 3.981481481481482e-05, "loss": 0.3218, "step": 340 }, { "epoch": 22.0, "eval_accuracy": 0.7159090909090909, "eval_loss": 1.0231382846832275, "eval_runtime": 1.0582, "eval_samples_per_second": 83.157, "eval_steps_per_second": 5.67, "step": 341 }, { "epoch": 22.58, "learning_rate": 3.935185185185186e-05, "loss": 0.3151, "step": 350 }, { "epoch": 22.97, "eval_accuracy": 0.7386363636363636, "eval_loss": 0.9173128604888916, "eval_runtime": 1.0472, "eval_samples_per_second": 84.031, "eval_steps_per_second": 5.729, "step": 356 }, { "epoch": 23.23, "learning_rate": 3.888888888888889e-05, "loss": 0.3135, "step": 360 }, { "epoch": 23.87, "learning_rate": 3.8425925925925924e-05, "loss": 0.2708, "step": 370 }, { "epoch": 24.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.9598014950752258, "eval_runtime": 1.0532, "eval_samples_per_second": 83.552, "eval_steps_per_second": 5.697, "step": 372 }, { "epoch": 24.52, "learning_rate": 3.7962962962962964e-05, "loss": 0.2802, "step": 380 }, { "epoch": 24.97, "eval_accuracy": 0.7386363636363636, "eval_loss": 0.904956579208374, "eval_runtime": 1.0442, "eval_samples_per_second": 84.272, "eval_steps_per_second": 5.746, "step": 387 }, { "epoch": 25.16, "learning_rate": 3.7500000000000003e-05, "loss": 0.2795, "step": 390 }, { "epoch": 25.81, "learning_rate": 3.7037037037037037e-05, "loss": 0.3114, "step": 400 }, { "epoch": 26.0, "eval_accuracy": 0.7727272727272727, "eval_loss": 0.8725055456161499, "eval_runtime": 1.0662, "eval_samples_per_second": 82.533, "eval_steps_per_second": 5.627, "step": 403 }, { "epoch": 26.45, "learning_rate": 3.6574074074074076e-05, "loss": 0.2794, "step": 410 }, { "epoch": 26.97, "eval_accuracy": 0.7386363636363636, "eval_loss": 0.9578754901885986, "eval_runtime": 1.0662, "eval_samples_per_second": 82.533, "eval_steps_per_second": 5.627, "step": 418 }, { "epoch": 27.1, "learning_rate": 3.611111111111111e-05, "loss": 0.2289, "step": 420 }, { "epoch": 27.74, "learning_rate": 3.564814814814815e-05, "loss": 0.26, "step": 430 }, { "epoch": 28.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 1.0064443349838257, "eval_runtime": 1.0862, "eval_samples_per_second": 81.013, "eval_steps_per_second": 5.524, "step": 434 }, { "epoch": 28.39, "learning_rate": 3.518518518518519e-05, "loss": 0.2961, "step": 440 }, { "epoch": 28.97, "eval_accuracy": 0.75, "eval_loss": 1.1055842638015747, "eval_runtime": 1.1363, "eval_samples_per_second": 77.447, "eval_steps_per_second": 5.281, "step": 449 }, { "epoch": 29.03, "learning_rate": 3.472222222222222e-05, "loss": 0.2547, "step": 450 }, { "epoch": 29.68, "learning_rate": 3.425925925925926e-05, "loss": 0.297, "step": 460 }, { "epoch": 30.0, "eval_accuracy": 0.7727272727272727, "eval_loss": 0.8761419653892517, "eval_runtime": 1.1092, "eval_samples_per_second": 79.333, "eval_steps_per_second": 5.409, "step": 465 }, { "epoch": 30.32, "learning_rate": 3.3796296296296295e-05, "loss": 0.2583, "step": 470 }, { "epoch": 30.97, "learning_rate": 3.3333333333333335e-05, "loss": 0.2044, "step": 480 }, { "epoch": 30.97, "eval_accuracy": 0.7613636363636364, "eval_loss": 1.0461256504058838, "eval_runtime": 1.1173, "eval_samples_per_second": 78.765, "eval_steps_per_second": 5.37, "step": 480 }, { "epoch": 31.61, "learning_rate": 3.2870370370370375e-05, "loss": 0.1884, "step": 490 }, { "epoch": 32.0, "eval_accuracy": 0.75, "eval_loss": 0.9888725280761719, "eval_runtime": 1.0972, "eval_samples_per_second": 80.201, "eval_steps_per_second": 5.468, "step": 496 }, { "epoch": 32.26, "learning_rate": 3.240740740740741e-05, "loss": 0.1857, "step": 500 }, { "epoch": 32.9, "learning_rate": 3.194444444444444e-05, "loss": 0.2156, "step": 510 }, { "epoch": 32.97, "eval_accuracy": 0.7727272727272727, "eval_loss": 1.0186409950256348, "eval_runtime": 1.1423, "eval_samples_per_second": 77.04, "eval_steps_per_second": 5.253, "step": 511 }, { "epoch": 33.55, "learning_rate": 3.148148148148148e-05, "loss": 0.194, "step": 520 }, { "epoch": 34.0, "eval_accuracy": 0.7727272727272727, "eval_loss": 1.0899921655654907, "eval_runtime": 1.1843, "eval_samples_per_second": 74.308, "eval_steps_per_second": 5.066, "step": 527 }, { "epoch": 34.19, "learning_rate": 3.101851851851852e-05, "loss": 0.181, "step": 530 }, { "epoch": 34.84, "learning_rate": 3.055555555555556e-05, "loss": 0.2085, "step": 540 }, { "epoch": 34.97, "eval_accuracy": 0.75, "eval_loss": 1.0762078762054443, "eval_runtime": 1.1223, "eval_samples_per_second": 78.414, "eval_steps_per_second": 5.346, "step": 542 }, { "epoch": 35.48, "learning_rate": 3.0092592592592593e-05, "loss": 0.1909, "step": 550 }, { "epoch": 36.0, "eval_accuracy": 0.7840909090909091, "eval_loss": 1.0324878692626953, "eval_runtime": 1.1523, "eval_samples_per_second": 76.372, "eval_steps_per_second": 5.207, "step": 558 }, { "epoch": 36.13, "learning_rate": 2.962962962962963e-05, "loss": 0.1864, "step": 560 }, { "epoch": 36.77, "learning_rate": 2.916666666666667e-05, "loss": 0.1551, "step": 570 }, { "epoch": 36.97, "eval_accuracy": 0.7045454545454546, "eval_loss": 1.149728775024414, "eval_runtime": 1.1653, "eval_samples_per_second": 75.519, "eval_steps_per_second": 5.149, "step": 573 }, { "epoch": 37.42, "learning_rate": 2.8703703703703706e-05, "loss": 0.2106, "step": 580 }, { "epoch": 38.0, "eval_accuracy": 0.7727272727272727, "eval_loss": 1.0304036140441895, "eval_runtime": 1.1102, "eval_samples_per_second": 79.261, "eval_steps_per_second": 5.404, "step": 589 }, { "epoch": 38.06, "learning_rate": 2.824074074074074e-05, "loss": 0.1922, "step": 590 }, { "epoch": 38.71, "learning_rate": 2.777777777777778e-05, "loss": 0.1771, "step": 600 }, { "epoch": 38.97, "eval_accuracy": 0.7840909090909091, "eval_loss": 1.0794123411178589, "eval_runtime": 1.1483, "eval_samples_per_second": 76.638, "eval_steps_per_second": 5.225, "step": 604 }, { "epoch": 39.35, "learning_rate": 2.7314814814814816e-05, "loss": 0.2051, "step": 610 }, { "epoch": 40.0, "learning_rate": 2.6851851851851855e-05, "loss": 0.1567, "step": 620 }, { "epoch": 40.0, "eval_accuracy": 0.7954545454545454, "eval_loss": 1.0634194612503052, "eval_runtime": 1.1643, "eval_samples_per_second": 75.584, "eval_steps_per_second": 5.153, "step": 620 }, { "epoch": 40.65, "learning_rate": 2.6388888888888892e-05, "loss": 0.1856, "step": 630 }, { "epoch": 40.97, "eval_accuracy": 0.7613636363636364, "eval_loss": 1.0716369152069092, "eval_runtime": 1.1173, "eval_samples_per_second": 78.765, "eval_steps_per_second": 5.37, "step": 635 }, { "epoch": 41.29, "learning_rate": 2.5925925925925925e-05, "loss": 0.1765, "step": 640 }, { "epoch": 41.94, "learning_rate": 2.5462962962962965e-05, "loss": 0.185, "step": 650 }, { "epoch": 42.0, "eval_accuracy": 0.8068181818181818, "eval_loss": 1.0321924686431885, "eval_runtime": 1.1433, "eval_samples_per_second": 76.973, "eval_steps_per_second": 5.248, "step": 651 }, { "epoch": 42.58, "learning_rate": 2.5e-05, "loss": 0.1239, "step": 660 }, { "epoch": 42.97, "eval_accuracy": 0.7613636363636364, "eval_loss": 1.1515936851501465, "eval_runtime": 1.1523, "eval_samples_per_second": 76.372, "eval_steps_per_second": 5.207, "step": 666 }, { "epoch": 43.23, "learning_rate": 2.4537037037037038e-05, "loss": 0.2364, "step": 670 }, { "epoch": 43.87, "learning_rate": 2.4074074074074074e-05, "loss": 0.1617, "step": 680 }, { "epoch": 44.0, "eval_accuracy": 0.7840909090909091, "eval_loss": 1.0321751832962036, "eval_runtime": 1.1253, "eval_samples_per_second": 78.205, "eval_steps_per_second": 5.332, "step": 682 }, { "epoch": 44.52, "learning_rate": 2.361111111111111e-05, "loss": 0.1221, "step": 690 }, { "epoch": 44.97, "eval_accuracy": 0.8068181818181818, "eval_loss": 1.0553076267242432, "eval_runtime": 1.1333, "eval_samples_per_second": 77.652, "eval_steps_per_second": 5.294, "step": 697 }, { "epoch": 45.16, "learning_rate": 2.314814814814815e-05, "loss": 0.1953, "step": 700 }, { "epoch": 45.81, "learning_rate": 2.2685185185185187e-05, "loss": 0.1433, "step": 710 }, { "epoch": 46.0, "eval_accuracy": 0.7727272727272727, "eval_loss": 1.0596939325332642, "eval_runtime": 1.1092, "eval_samples_per_second": 79.333, "eval_steps_per_second": 5.409, "step": 713 }, { "epoch": 46.45, "learning_rate": 2.2222222222222223e-05, "loss": 0.216, "step": 720 }, { "epoch": 46.97, "eval_accuracy": 0.75, "eval_loss": 1.1585843563079834, "eval_runtime": 1.1223, "eval_samples_per_second": 78.414, "eval_steps_per_second": 5.346, "step": 728 }, { "epoch": 47.1, "learning_rate": 2.175925925925926e-05, "loss": 0.17, "step": 730 }, { "epoch": 47.74, "learning_rate": 2.1296296296296296e-05, "loss": 0.1807, "step": 740 }, { "epoch": 48.0, "eval_accuracy": 0.7727272727272727, "eval_loss": 1.0872578620910645, "eval_runtime": 1.1243, "eval_samples_per_second": 78.274, "eval_steps_per_second": 5.337, "step": 744 }, { "epoch": 48.39, "learning_rate": 2.0833333333333336e-05, "loss": 0.185, "step": 750 }, { "epoch": 48.97, "eval_accuracy": 0.7727272727272727, "eval_loss": 1.2490220069885254, "eval_runtime": 1.1133, "eval_samples_per_second": 79.048, "eval_steps_per_second": 5.39, "step": 759 }, { "epoch": 49.03, "learning_rate": 2.037037037037037e-05, "loss": 0.1114, "step": 760 }, { "epoch": 49.68, "learning_rate": 1.990740740740741e-05, "loss": 0.1554, "step": 770 }, { "epoch": 50.0, "eval_accuracy": 0.7613636363636364, "eval_loss": 1.222277045249939, "eval_runtime": 1.2013, "eval_samples_per_second": 73.256, "eval_steps_per_second": 4.995, "step": 775 }, { "epoch": 50.32, "learning_rate": 1.9444444444444445e-05, "loss": 0.1378, "step": 780 }, { "epoch": 50.97, "learning_rate": 1.8981481481481482e-05, "loss": 0.1359, "step": 790 }, { "epoch": 50.97, "eval_accuracy": 0.75, "eval_loss": 1.2345318794250488, "eval_runtime": 1.1193, "eval_samples_per_second": 78.624, "eval_steps_per_second": 5.361, "step": 790 }, { "epoch": 51.61, "learning_rate": 1.8518518518518518e-05, "loss": 0.0929, "step": 800 }, { "epoch": 52.0, "eval_accuracy": 0.7613636363636364, "eval_loss": 1.1833394765853882, "eval_runtime": 1.1123, "eval_samples_per_second": 79.119, "eval_steps_per_second": 5.394, "step": 806 }, { "epoch": 52.26, "learning_rate": 1.8055555555555555e-05, "loss": 0.1513, "step": 810 }, { "epoch": 52.9, "learning_rate": 1.7592592592592595e-05, "loss": 0.1379, "step": 820 }, { "epoch": 52.97, "eval_accuracy": 0.7386363636363636, "eval_loss": 1.2581446170806885, "eval_runtime": 1.1193, "eval_samples_per_second": 78.624, "eval_steps_per_second": 5.361, "step": 821 }, { "epoch": 53.55, "learning_rate": 1.712962962962963e-05, "loss": 0.145, "step": 830 }, { "epoch": 54.0, "eval_accuracy": 0.75, "eval_loss": 1.3023442029953003, "eval_runtime": 1.1203, "eval_samples_per_second": 78.554, "eval_steps_per_second": 5.356, "step": 837 }, { "epoch": 54.19, "learning_rate": 1.6666666666666667e-05, "loss": 0.1646, "step": 840 }, { "epoch": 54.84, "learning_rate": 1.6203703703703704e-05, "loss": 0.134, "step": 850 }, { "epoch": 54.97, "eval_accuracy": 0.75, "eval_loss": 1.2468658685684204, "eval_runtime": 1.1072, "eval_samples_per_second": 79.476, "eval_steps_per_second": 5.419, "step": 852 }, { "epoch": 55.48, "learning_rate": 1.574074074074074e-05, "loss": 0.1974, "step": 860 }, { "epoch": 56.0, "eval_accuracy": 0.7386363636363636, "eval_loss": 1.2670570611953735, "eval_runtime": 1.1002, "eval_samples_per_second": 79.982, "eval_steps_per_second": 5.453, "step": 868 }, { "epoch": 56.13, "learning_rate": 1.527777777777778e-05, "loss": 0.1158, "step": 870 }, { "epoch": 56.77, "learning_rate": 1.4814814814814815e-05, "loss": 0.122, "step": 880 }, { "epoch": 56.97, "eval_accuracy": 0.7272727272727273, "eval_loss": 1.267562747001648, "eval_runtime": 1.0852, "eval_samples_per_second": 81.088, "eval_steps_per_second": 5.529, "step": 883 }, { "epoch": 57.42, "learning_rate": 1.4351851851851853e-05, "loss": 0.1487, "step": 890 }, { "epoch": 58.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 1.2845935821533203, "eval_runtime": 1.1173, "eval_samples_per_second": 78.765, "eval_steps_per_second": 5.37, "step": 899 }, { "epoch": 58.06, "learning_rate": 1.388888888888889e-05, "loss": 0.1473, "step": 900 }, { "epoch": 58.71, "learning_rate": 1.3425925925925928e-05, "loss": 0.1282, "step": 910 }, { "epoch": 58.97, "eval_accuracy": 0.75, "eval_loss": 1.1780353784561157, "eval_runtime": 1.0612, "eval_samples_per_second": 82.922, "eval_steps_per_second": 5.654, "step": 914 }, { "epoch": 59.35, "learning_rate": 1.2962962962962962e-05, "loss": 0.1006, "step": 920 }, { "epoch": 60.0, "learning_rate": 1.25e-05, "loss": 0.0989, "step": 930 }, { "epoch": 60.0, "eval_accuracy": 0.75, "eval_loss": 1.231979489326477, "eval_runtime": 1.0982, "eval_samples_per_second": 80.128, "eval_steps_per_second": 5.463, "step": 930 }, { "epoch": 60.65, "learning_rate": 1.2037037037037037e-05, "loss": 0.0997, "step": 940 }, { "epoch": 60.97, "eval_accuracy": 0.7386363636363636, "eval_loss": 1.2791739702224731, "eval_runtime": 1.1663, "eval_samples_per_second": 75.455, "eval_steps_per_second": 5.145, "step": 945 }, { "epoch": 61.29, "learning_rate": 1.1574074074074075e-05, "loss": 0.1234, "step": 950 }, { "epoch": 61.94, "learning_rate": 1.1111111111111112e-05, "loss": 0.1058, "step": 960 }, { "epoch": 62.0, "eval_accuracy": 0.7613636363636364, "eval_loss": 1.2126153707504272, "eval_runtime": 1.1072, "eval_samples_per_second": 79.476, "eval_steps_per_second": 5.419, "step": 961 }, { "epoch": 62.58, "learning_rate": 1.0648148148148148e-05, "loss": 0.1105, "step": 970 }, { "epoch": 62.97, "eval_accuracy": 0.7386363636363636, "eval_loss": 1.256054401397705, "eval_runtime": 1.0872, "eval_samples_per_second": 80.939, "eval_steps_per_second": 5.519, "step": 976 }, { "epoch": 63.23, "learning_rate": 1.0185185185185185e-05, "loss": 0.1254, "step": 980 }, { "epoch": 63.87, "learning_rate": 9.722222222222223e-06, "loss": 0.0957, "step": 990 }, { "epoch": 64.0, "eval_accuracy": 0.7613636363636364, "eval_loss": 1.1701573133468628, "eval_runtime": 1.1323, "eval_samples_per_second": 77.721, "eval_steps_per_second": 5.299, "step": 992 }, { "epoch": 64.52, "learning_rate": 9.259259259259259e-06, "loss": 0.1326, "step": 1000 }, { "epoch": 64.97, "eval_accuracy": 0.75, "eval_loss": 1.1838548183441162, "eval_runtime": 1.1042, "eval_samples_per_second": 79.692, "eval_steps_per_second": 5.434, "step": 1007 }, { "epoch": 65.16, "learning_rate": 8.796296296296297e-06, "loss": 0.1424, "step": 1010 }, { "epoch": 65.81, "learning_rate": 8.333333333333334e-06, "loss": 0.0838, "step": 1020 }, { "epoch": 66.0, "eval_accuracy": 0.7386363636363636, "eval_loss": 1.2728073596954346, "eval_runtime": 1.1643, "eval_samples_per_second": 75.584, "eval_steps_per_second": 5.153, "step": 1023 }, { "epoch": 66.45, "learning_rate": 7.87037037037037e-06, "loss": 0.1163, "step": 1030 }, { "epoch": 66.97, "eval_accuracy": 0.75, "eval_loss": 1.2736155986785889, "eval_runtime": 1.1353, "eval_samples_per_second": 77.516, "eval_steps_per_second": 5.285, "step": 1038 }, { "epoch": 67.1, "learning_rate": 7.4074074074074075e-06, "loss": 0.1363, "step": 1040 }, { "epoch": 67.74, "learning_rate": 6.944444444444445e-06, "loss": 0.0926, "step": 1050 }, { "epoch": 68.0, "eval_accuracy": 0.75, "eval_loss": 1.2641026973724365, "eval_runtime": 1.1493, "eval_samples_per_second": 76.571, "eval_steps_per_second": 5.221, "step": 1054 }, { "epoch": 68.39, "learning_rate": 6.481481481481481e-06, "loss": 0.102, "step": 1060 }, { "epoch": 68.97, "eval_accuracy": 0.75, "eval_loss": 1.3310309648513794, "eval_runtime": 1.1102, "eval_samples_per_second": 79.261, "eval_steps_per_second": 5.404, "step": 1069 }, { "epoch": 69.03, "learning_rate": 6.0185185185185185e-06, "loss": 0.1224, "step": 1070 }, { "epoch": 69.68, "learning_rate": 5.555555555555556e-06, "loss": 0.0996, "step": 1080 }, { "epoch": 70.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 1.3119887113571167, "eval_runtime": 1.1012, "eval_samples_per_second": 79.909, "eval_steps_per_second": 5.448, "step": 1085 }, { "epoch": 70.32, "learning_rate": 5.092592592592592e-06, "loss": 0.1182, "step": 1090 }, { "epoch": 70.97, "learning_rate": 4.6296296296296296e-06, "loss": 0.081, "step": 1100 }, { "epoch": 70.97, "eval_accuracy": 0.7272727272727273, "eval_loss": 1.3357912302017212, "eval_runtime": 1.1133, "eval_samples_per_second": 79.048, "eval_steps_per_second": 5.39, "step": 1100 }, { "epoch": 71.61, "learning_rate": 4.166666666666667e-06, "loss": 0.1305, "step": 1110 }, { "epoch": 72.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 1.3439818620681763, "eval_runtime": 1.1243, "eval_samples_per_second": 78.274, "eval_steps_per_second": 5.337, "step": 1116 }, { "epoch": 72.26, "learning_rate": 3.7037037037037037e-06, "loss": 0.1309, "step": 1120 }, { "epoch": 72.9, "learning_rate": 3.2407407407407406e-06, "loss": 0.1131, "step": 1130 }, { "epoch": 72.97, "eval_accuracy": 0.7272727272727273, "eval_loss": 1.3126434087753296, "eval_runtime": 1.1203, "eval_samples_per_second": 78.554, "eval_steps_per_second": 5.356, "step": 1131 }, { "epoch": 73.55, "learning_rate": 2.777777777777778e-06, "loss": 0.0883, "step": 1140 }, { "epoch": 74.0, "eval_accuracy": 0.7386363636363636, "eval_loss": 1.284845232963562, "eval_runtime": 1.0972, "eval_samples_per_second": 80.201, "eval_steps_per_second": 5.468, "step": 1147 }, { "epoch": 74.19, "learning_rate": 2.3148148148148148e-06, "loss": 0.1135, "step": 1150 }, { "epoch": 74.84, "learning_rate": 1.8518518518518519e-06, "loss": 0.0873, "step": 1160 }, { "epoch": 74.97, "eval_accuracy": 0.7386363636363636, "eval_loss": 1.2801809310913086, "eval_runtime": 1.1313, "eval_samples_per_second": 77.79, "eval_steps_per_second": 5.304, "step": 1162 }, { "epoch": 75.48, "learning_rate": 1.388888888888889e-06, "loss": 0.0991, "step": 1170 }, { "epoch": 76.0, "eval_accuracy": 0.75, "eval_loss": 1.2710990905761719, "eval_runtime": 1.1643, "eval_samples_per_second": 75.584, "eval_steps_per_second": 5.153, "step": 1178 }, { "epoch": 76.13, "learning_rate": 9.259259259259259e-07, "loss": 0.0946, "step": 1180 }, { "epoch": 76.77, "learning_rate": 4.6296296296296297e-07, "loss": 0.0881, "step": 1190 }, { "epoch": 76.97, "eval_accuracy": 0.75, "eval_loss": 1.2746005058288574, "eval_runtime": 1.4135, "eval_samples_per_second": 62.256, "eval_steps_per_second": 4.245, "step": 1193 }, { "epoch": 77.42, "learning_rate": 0.0, "loss": 0.0895, "step": 1200 }, { "epoch": 77.42, "eval_accuracy": 0.75, "eval_loss": 1.275152325630188, "eval_runtime": 1.1543, "eval_samples_per_second": 76.239, "eval_steps_per_second": 5.198, "step": 1200 }, { "epoch": 77.42, "step": 1200, "total_flos": 2.4687812024881644e+18, "train_loss": 0.34467112536231675, "train_runtime": 2690.4819, "train_samples_per_second": 29.14, "train_steps_per_second": 0.446 } ], "logging_steps": 10, "max_steps": 1200, "num_input_tokens_seen": 0, "num_train_epochs": 80, "save_steps": 500, "total_flos": 2.4687812024881644e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }