{ "best_metric": 0.9811772758384668, "best_model_checkpoint": "swin-tiny-patch4-window7-224-spa_saloon_classification/checkpoint-1849", "epoch": 9.975669099756692, "eval_steps": 500, "global_step": 2050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1.2195121951219514e-06, "loss": 1.8606, "step": 5 }, { "epoch": 0.05, "learning_rate": 2.4390243902439027e-06, "loss": 1.8294, "step": 10 }, { "epoch": 0.07, "learning_rate": 3.6585365853658537e-06, "loss": 1.8047, "step": 15 }, { "epoch": 0.1, "learning_rate": 4.8780487804878055e-06, "loss": 1.7415, "step": 20 }, { "epoch": 0.12, "learning_rate": 6.0975609756097564e-06, "loss": 1.6899, "step": 25 }, { "epoch": 0.15, "learning_rate": 7.317073170731707e-06, "loss": 1.6166, "step": 30 }, { "epoch": 0.17, "learning_rate": 8.53658536585366e-06, "loss": 1.518, "step": 35 }, { "epoch": 0.19, "learning_rate": 9.756097560975611e-06, "loss": 1.4472, "step": 40 }, { "epoch": 0.22, "learning_rate": 1.0975609756097562e-05, "loss": 1.2949, "step": 45 }, { "epoch": 0.24, "learning_rate": 1.2195121951219513e-05, "loss": 1.1811, "step": 50 }, { "epoch": 0.27, "learning_rate": 1.3414634146341466e-05, "loss": 1.0548, "step": 55 }, { "epoch": 0.29, "learning_rate": 1.4634146341463415e-05, "loss": 0.9112, "step": 60 }, { "epoch": 0.32, "learning_rate": 1.5853658536585366e-05, "loss": 0.8244, "step": 65 }, { "epoch": 0.34, "learning_rate": 1.707317073170732e-05, "loss": 0.7256, "step": 70 }, { "epoch": 0.36, "learning_rate": 1.8292682926829268e-05, "loss": 0.6848, "step": 75 }, { "epoch": 0.39, "learning_rate": 1.9512195121951222e-05, "loss": 0.6532, "step": 80 }, { "epoch": 0.41, "learning_rate": 2.073170731707317e-05, "loss": 0.5777, "step": 85 }, { "epoch": 0.44, "learning_rate": 2.1951219512195124e-05, "loss": 0.6014, "step": 90 }, { "epoch": 0.46, "learning_rate": 2.3170731707317075e-05, "loss": 0.5424, "step": 95 }, { "epoch": 0.49, "learning_rate": 2.4390243902439026e-05, "loss": 0.4876, "step": 100 }, { "epoch": 0.51, "learning_rate": 2.5609756097560977e-05, "loss": 0.4372, "step": 105 }, { "epoch": 0.54, "learning_rate": 2.682926829268293e-05, "loss": 0.4551, "step": 110 }, { "epoch": 0.56, "learning_rate": 2.8048780487804882e-05, "loss": 0.4751, "step": 115 }, { "epoch": 0.58, "learning_rate": 2.926829268292683e-05, "loss": 0.4652, "step": 120 }, { "epoch": 0.61, "learning_rate": 3.048780487804878e-05, "loss": 0.4264, "step": 125 }, { "epoch": 0.63, "learning_rate": 3.170731707317073e-05, "loss": 0.399, "step": 130 }, { "epoch": 0.66, "learning_rate": 3.292682926829269e-05, "loss": 0.4399, "step": 135 }, { "epoch": 0.68, "learning_rate": 3.414634146341464e-05, "loss": 0.4074, "step": 140 }, { "epoch": 0.71, "learning_rate": 3.5365853658536584e-05, "loss": 0.397, "step": 145 }, { "epoch": 0.73, "learning_rate": 3.6585365853658535e-05, "loss": 0.3578, "step": 150 }, { "epoch": 0.75, "learning_rate": 3.780487804878049e-05, "loss": 0.3771, "step": 155 }, { "epoch": 0.78, "learning_rate": 3.9024390243902444e-05, "loss": 0.366, "step": 160 }, { "epoch": 0.8, "learning_rate": 4.0243902439024395e-05, "loss": 0.3792, "step": 165 }, { "epoch": 0.83, "learning_rate": 4.146341463414634e-05, "loss": 0.3706, "step": 170 }, { "epoch": 0.85, "learning_rate": 4.26829268292683e-05, "loss": 0.352, "step": 175 }, { "epoch": 0.88, "learning_rate": 4.390243902439025e-05, "loss": 0.3481, "step": 180 }, { "epoch": 0.9, "learning_rate": 4.51219512195122e-05, "loss": 0.2698, "step": 185 }, { "epoch": 0.92, "learning_rate": 4.634146341463415e-05, "loss": 0.3573, "step": 190 }, { "epoch": 0.95, "learning_rate": 4.75609756097561e-05, "loss": 0.3479, "step": 195 }, { "epoch": 0.97, "learning_rate": 4.878048780487805e-05, "loss": 0.3917, "step": 200 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 0.337, "step": 205 }, { "epoch": 1.0, "eval_accuracy": 0.9175222450376455, "eval_loss": 0.21079373359680176, "eval_runtime": 21.0784, "eval_samples_per_second": 138.625, "eval_steps_per_second": 4.365, "step": 205 }, { "epoch": 1.02, "learning_rate": 4.986449864498645e-05, "loss": 0.3492, "step": 210 }, { "epoch": 1.05, "learning_rate": 4.97289972899729e-05, "loss": 0.3251, "step": 215 }, { "epoch": 1.07, "learning_rate": 4.959349593495935e-05, "loss": 0.2809, "step": 220 }, { "epoch": 1.09, "learning_rate": 4.9457994579945803e-05, "loss": 0.3412, "step": 225 }, { "epoch": 1.12, "learning_rate": 4.932249322493225e-05, "loss": 0.3049, "step": 230 }, { "epoch": 1.14, "learning_rate": 4.9186991869918704e-05, "loss": 0.3123, "step": 235 }, { "epoch": 1.17, "learning_rate": 4.905149051490515e-05, "loss": 0.2915, "step": 240 }, { "epoch": 1.19, "learning_rate": 4.89159891598916e-05, "loss": 0.2875, "step": 245 }, { "epoch": 1.22, "learning_rate": 4.878048780487805e-05, "loss": 0.2721, "step": 250 }, { "epoch": 1.24, "learning_rate": 4.86449864498645e-05, "loss": 0.2551, "step": 255 }, { "epoch": 1.27, "learning_rate": 4.8509485094850945e-05, "loss": 0.2956, "step": 260 }, { "epoch": 1.29, "learning_rate": 4.8373983739837406e-05, "loss": 0.2643, "step": 265 }, { "epoch": 1.31, "learning_rate": 4.823848238482385e-05, "loss": 0.3148, "step": 270 }, { "epoch": 1.34, "learning_rate": 4.81029810298103e-05, "loss": 0.2628, "step": 275 }, { "epoch": 1.36, "learning_rate": 4.796747967479675e-05, "loss": 0.2258, "step": 280 }, { "epoch": 1.39, "learning_rate": 4.78319783197832e-05, "loss": 0.2483, "step": 285 }, { "epoch": 1.41, "learning_rate": 4.769647696476965e-05, "loss": 0.2805, "step": 290 }, { "epoch": 1.44, "learning_rate": 4.75609756097561e-05, "loss": 0.2861, "step": 295 }, { "epoch": 1.46, "learning_rate": 4.7425474254742554e-05, "loss": 0.2972, "step": 300 }, { "epoch": 1.48, "learning_rate": 4.7289972899729e-05, "loss": 0.2432, "step": 305 }, { "epoch": 1.51, "learning_rate": 4.715447154471545e-05, "loss": 0.2517, "step": 310 }, { "epoch": 1.53, "learning_rate": 4.70189701897019e-05, "loss": 0.2707, "step": 315 }, { "epoch": 1.56, "learning_rate": 4.688346883468835e-05, "loss": 0.2512, "step": 320 }, { "epoch": 1.58, "learning_rate": 4.6747967479674795e-05, "loss": 0.2036, "step": 325 }, { "epoch": 1.61, "learning_rate": 4.661246612466125e-05, "loss": 0.2346, "step": 330 }, { "epoch": 1.63, "learning_rate": 4.6476964769647696e-05, "loss": 0.2921, "step": 335 }, { "epoch": 1.65, "learning_rate": 4.634146341463415e-05, "loss": 0.2789, "step": 340 }, { "epoch": 1.68, "learning_rate": 4.62059620596206e-05, "loss": 0.2456, "step": 345 }, { "epoch": 1.7, "learning_rate": 4.607046070460705e-05, "loss": 0.2536, "step": 350 }, { "epoch": 1.73, "learning_rate": 4.59349593495935e-05, "loss": 0.282, "step": 355 }, { "epoch": 1.75, "learning_rate": 4.579945799457995e-05, "loss": 0.2525, "step": 360 }, { "epoch": 1.78, "learning_rate": 4.56639566395664e-05, "loss": 0.2284, "step": 365 }, { "epoch": 1.8, "learning_rate": 4.5528455284552844e-05, "loss": 0.2389, "step": 370 }, { "epoch": 1.82, "learning_rate": 4.53929539295393e-05, "loss": 0.2425, "step": 375 }, { "epoch": 1.85, "learning_rate": 4.525745257452575e-05, "loss": 0.2051, "step": 380 }, { "epoch": 1.87, "learning_rate": 4.51219512195122e-05, "loss": 0.278, "step": 385 }, { "epoch": 1.9, "learning_rate": 4.4986449864498645e-05, "loss": 0.2004, "step": 390 }, { "epoch": 1.92, "learning_rate": 4.48509485094851e-05, "loss": 0.2467, "step": 395 }, { "epoch": 1.95, "learning_rate": 4.4715447154471546e-05, "loss": 0.2411, "step": 400 }, { "epoch": 1.97, "learning_rate": 4.457994579945799e-05, "loss": 0.218, "step": 405 }, { "epoch": 2.0, "learning_rate": 4.4444444444444447e-05, "loss": 0.196, "step": 410 }, { "epoch": 2.0, "eval_accuracy": 0.9620123203285421, "eval_loss": 0.11370620876550674, "eval_runtime": 21.3607, "eval_samples_per_second": 136.793, "eval_steps_per_second": 4.307, "step": 411 }, { "epoch": 2.02, "learning_rate": 4.43089430894309e-05, "loss": 0.1786, "step": 415 }, { "epoch": 2.04, "learning_rate": 4.417344173441735e-05, "loss": 0.2308, "step": 420 }, { "epoch": 2.07, "learning_rate": 4.4037940379403794e-05, "loss": 0.2158, "step": 425 }, { "epoch": 2.09, "learning_rate": 4.390243902439025e-05, "loss": 0.1875, "step": 430 }, { "epoch": 2.12, "learning_rate": 4.3766937669376695e-05, "loss": 0.1998, "step": 435 }, { "epoch": 2.14, "learning_rate": 4.363143631436314e-05, "loss": 0.2001, "step": 440 }, { "epoch": 2.17, "learning_rate": 4.3495934959349595e-05, "loss": 0.2349, "step": 445 }, { "epoch": 2.19, "learning_rate": 4.336043360433605e-05, "loss": 0.2191, "step": 450 }, { "epoch": 2.21, "learning_rate": 4.3224932249322496e-05, "loss": 0.2052, "step": 455 }, { "epoch": 2.24, "learning_rate": 4.308943089430895e-05, "loss": 0.1963, "step": 460 }, { "epoch": 2.26, "learning_rate": 4.2953929539295396e-05, "loss": 0.2318, "step": 465 }, { "epoch": 2.29, "learning_rate": 4.281842818428184e-05, "loss": 0.2105, "step": 470 }, { "epoch": 2.31, "learning_rate": 4.26829268292683e-05, "loss": 0.2185, "step": 475 }, { "epoch": 2.34, "learning_rate": 4.2547425474254744e-05, "loss": 0.2426, "step": 480 }, { "epoch": 2.36, "learning_rate": 4.241192411924119e-05, "loss": 0.2248, "step": 485 }, { "epoch": 2.38, "learning_rate": 4.2276422764227644e-05, "loss": 0.232, "step": 490 }, { "epoch": 2.41, "learning_rate": 4.21409214092141e-05, "loss": 0.2263, "step": 495 }, { "epoch": 2.43, "learning_rate": 4.2005420054200545e-05, "loss": 0.1929, "step": 500 }, { "epoch": 2.46, "learning_rate": 4.186991869918699e-05, "loss": 0.2151, "step": 505 }, { "epoch": 2.48, "learning_rate": 4.1734417344173445e-05, "loss": 0.2043, "step": 510 }, { "epoch": 2.51, "learning_rate": 4.159891598915989e-05, "loss": 0.1585, "step": 515 }, { "epoch": 2.53, "learning_rate": 4.146341463414634e-05, "loss": 0.2013, "step": 520 }, { "epoch": 2.55, "learning_rate": 4.132791327913279e-05, "loss": 0.2072, "step": 525 }, { "epoch": 2.58, "learning_rate": 4.1192411924119246e-05, "loss": 0.1806, "step": 530 }, { "epoch": 2.6, "learning_rate": 4.105691056910569e-05, "loss": 0.1539, "step": 535 }, { "epoch": 2.63, "learning_rate": 4.092140921409214e-05, "loss": 0.1582, "step": 540 }, { "epoch": 2.65, "learning_rate": 4.0785907859078594e-05, "loss": 0.1856, "step": 545 }, { "epoch": 2.68, "learning_rate": 4.065040650406504e-05, "loss": 0.1966, "step": 550 }, { "epoch": 2.7, "learning_rate": 4.051490514905149e-05, "loss": 0.205, "step": 555 }, { "epoch": 2.73, "learning_rate": 4.037940379403794e-05, "loss": 0.2359, "step": 560 }, { "epoch": 2.75, "learning_rate": 4.0243902439024395e-05, "loss": 0.1834, "step": 565 }, { "epoch": 2.77, "learning_rate": 4.010840108401084e-05, "loss": 0.2631, "step": 570 }, { "epoch": 2.8, "learning_rate": 3.9972899728997295e-05, "loss": 0.2, "step": 575 }, { "epoch": 2.82, "learning_rate": 3.983739837398374e-05, "loss": 0.1802, "step": 580 }, { "epoch": 2.85, "learning_rate": 3.970189701897019e-05, "loss": 0.2305, "step": 585 }, { "epoch": 2.87, "learning_rate": 3.956639566395664e-05, "loss": 0.1662, "step": 590 }, { "epoch": 2.9, "learning_rate": 3.943089430894309e-05, "loss": 0.166, "step": 595 }, { "epoch": 2.92, "learning_rate": 3.9295392953929537e-05, "loss": 0.167, "step": 600 }, { "epoch": 2.94, "learning_rate": 3.915989159891599e-05, "loss": 0.2017, "step": 605 }, { "epoch": 2.97, "learning_rate": 3.9024390243902444e-05, "loss": 0.1826, "step": 610 }, { "epoch": 2.99, "learning_rate": 3.888888888888889e-05, "loss": 0.1502, "step": 615 }, { "epoch": 3.0, "eval_accuracy": 0.9668035592060232, "eval_loss": 0.1030467301607132, "eval_runtime": 22.6678, "eval_samples_per_second": 128.905, "eval_steps_per_second": 4.059, "step": 616 }, { "epoch": 3.02, "learning_rate": 3.875338753387534e-05, "loss": 0.1764, "step": 620 }, { "epoch": 3.04, "learning_rate": 3.861788617886179e-05, "loss": 0.1602, "step": 625 }, { "epoch": 3.07, "learning_rate": 3.848238482384824e-05, "loss": 0.139, "step": 630 }, { "epoch": 3.09, "learning_rate": 3.8346883468834685e-05, "loss": 0.1811, "step": 635 }, { "epoch": 3.11, "learning_rate": 3.8211382113821145e-05, "loss": 0.1824, "step": 640 }, { "epoch": 3.14, "learning_rate": 3.807588075880759e-05, "loss": 0.1987, "step": 645 }, { "epoch": 3.16, "learning_rate": 3.794037940379404e-05, "loss": 0.1861, "step": 650 }, { "epoch": 3.19, "learning_rate": 3.780487804878049e-05, "loss": 0.2177, "step": 655 }, { "epoch": 3.21, "learning_rate": 3.766937669376694e-05, "loss": 0.1591, "step": 660 }, { "epoch": 3.24, "learning_rate": 3.753387533875339e-05, "loss": 0.2049, "step": 665 }, { "epoch": 3.26, "learning_rate": 3.739837398373984e-05, "loss": 0.1844, "step": 670 }, { "epoch": 3.28, "learning_rate": 3.726287262872629e-05, "loss": 0.1668, "step": 675 }, { "epoch": 3.31, "learning_rate": 3.712737127371274e-05, "loss": 0.1768, "step": 680 }, { "epoch": 3.33, "learning_rate": 3.699186991869919e-05, "loss": 0.1619, "step": 685 }, { "epoch": 3.36, "learning_rate": 3.685636856368564e-05, "loss": 0.2358, "step": 690 }, { "epoch": 3.38, "learning_rate": 3.672086720867209e-05, "loss": 0.1658, "step": 695 }, { "epoch": 3.41, "learning_rate": 3.6585365853658535e-05, "loss": 0.163, "step": 700 }, { "epoch": 3.43, "learning_rate": 3.644986449864499e-05, "loss": 0.1658, "step": 705 }, { "epoch": 3.45, "learning_rate": 3.6314363143631436e-05, "loss": 0.2312, "step": 710 }, { "epoch": 3.48, "learning_rate": 3.617886178861789e-05, "loss": 0.2043, "step": 715 }, { "epoch": 3.5, "learning_rate": 3.6043360433604336e-05, "loss": 0.1625, "step": 720 }, { "epoch": 3.53, "learning_rate": 3.590785907859079e-05, "loss": 0.1954, "step": 725 }, { "epoch": 3.55, "learning_rate": 3.577235772357724e-05, "loss": 0.1613, "step": 730 }, { "epoch": 3.58, "learning_rate": 3.5636856368563684e-05, "loss": 0.1924, "step": 735 }, { "epoch": 3.6, "learning_rate": 3.550135501355014e-05, "loss": 0.177, "step": 740 }, { "epoch": 3.63, "learning_rate": 3.5365853658536584e-05, "loss": 0.1707, "step": 745 }, { "epoch": 3.65, "learning_rate": 3.523035230352303e-05, "loss": 0.1454, "step": 750 }, { "epoch": 3.67, "learning_rate": 3.509485094850949e-05, "loss": 0.1731, "step": 755 }, { "epoch": 3.7, "learning_rate": 3.495934959349594e-05, "loss": 0.1361, "step": 760 }, { "epoch": 3.72, "learning_rate": 3.4823848238482385e-05, "loss": 0.1592, "step": 765 }, { "epoch": 3.75, "learning_rate": 3.468834688346884e-05, "loss": 0.1691, "step": 770 }, { "epoch": 3.77, "learning_rate": 3.4552845528455286e-05, "loss": 0.1321, "step": 775 }, { "epoch": 3.8, "learning_rate": 3.441734417344173e-05, "loss": 0.133, "step": 780 }, { "epoch": 3.82, "learning_rate": 3.4281842818428186e-05, "loss": 0.1367, "step": 785 }, { "epoch": 3.84, "learning_rate": 3.414634146341464e-05, "loss": 0.1517, "step": 790 }, { "epoch": 3.87, "learning_rate": 3.401084010840109e-05, "loss": 0.1478, "step": 795 }, { "epoch": 3.89, "learning_rate": 3.3875338753387534e-05, "loss": 0.1408, "step": 800 }, { "epoch": 3.92, "learning_rate": 3.373983739837399e-05, "loss": 0.2103, "step": 805 }, { "epoch": 3.94, "learning_rate": 3.3604336043360434e-05, "loss": 0.2157, "step": 810 }, { "epoch": 3.97, "learning_rate": 3.346883468834688e-05, "loss": 0.1526, "step": 815 }, { "epoch": 3.99, "learning_rate": 3.3333333333333335e-05, "loss": 0.1476, "step": 820 }, { "epoch": 4.0, "eval_accuracy": 0.9736481861738535, "eval_loss": 0.08152312785387039, "eval_runtime": 21.5748, "eval_samples_per_second": 135.436, "eval_steps_per_second": 4.264, "step": 822 }, { "epoch": 4.01, "learning_rate": 3.319783197831978e-05, "loss": 0.1679, "step": 825 }, { "epoch": 4.04, "learning_rate": 3.3062330623306235e-05, "loss": 0.1668, "step": 830 }, { "epoch": 4.06, "learning_rate": 3.292682926829269e-05, "loss": 0.1528, "step": 835 }, { "epoch": 4.09, "learning_rate": 3.2791327913279136e-05, "loss": 0.1984, "step": 840 }, { "epoch": 4.11, "learning_rate": 3.265582655826558e-05, "loss": 0.1575, "step": 845 }, { "epoch": 4.14, "learning_rate": 3.2520325203252037e-05, "loss": 0.156, "step": 850 }, { "epoch": 4.16, "learning_rate": 3.2384823848238483e-05, "loss": 0.1227, "step": 855 }, { "epoch": 4.18, "learning_rate": 3.224932249322493e-05, "loss": 0.1759, "step": 860 }, { "epoch": 4.21, "learning_rate": 3.2113821138211384e-05, "loss": 0.1261, "step": 865 }, { "epoch": 4.23, "learning_rate": 3.197831978319784e-05, "loss": 0.1566, "step": 870 }, { "epoch": 4.26, "learning_rate": 3.1842818428184285e-05, "loss": 0.1424, "step": 875 }, { "epoch": 4.28, "learning_rate": 3.170731707317073e-05, "loss": 0.1353, "step": 880 }, { "epoch": 4.31, "learning_rate": 3.1571815718157185e-05, "loss": 0.1595, "step": 885 }, { "epoch": 4.33, "learning_rate": 3.143631436314363e-05, "loss": 0.1823, "step": 890 }, { "epoch": 4.36, "learning_rate": 3.130081300813008e-05, "loss": 0.1428, "step": 895 }, { "epoch": 4.38, "learning_rate": 3.116531165311653e-05, "loss": 0.1566, "step": 900 }, { "epoch": 4.4, "learning_rate": 3.1029810298102986e-05, "loss": 0.099, "step": 905 }, { "epoch": 4.43, "learning_rate": 3.089430894308943e-05, "loss": 0.1311, "step": 910 }, { "epoch": 4.45, "learning_rate": 3.075880758807588e-05, "loss": 0.1317, "step": 915 }, { "epoch": 4.48, "learning_rate": 3.0623306233062334e-05, "loss": 0.1647, "step": 920 }, { "epoch": 4.5, "learning_rate": 3.048780487804878e-05, "loss": 0.1646, "step": 925 }, { "epoch": 4.53, "learning_rate": 3.035230352303523e-05, "loss": 0.1429, "step": 930 }, { "epoch": 4.55, "learning_rate": 3.021680216802168e-05, "loss": 0.1511, "step": 935 }, { "epoch": 4.57, "learning_rate": 3.0081300813008135e-05, "loss": 0.1691, "step": 940 }, { "epoch": 4.6, "learning_rate": 2.9945799457994585e-05, "loss": 0.1174, "step": 945 }, { "epoch": 4.62, "learning_rate": 2.9810298102981032e-05, "loss": 0.1743, "step": 950 }, { "epoch": 4.65, "learning_rate": 2.9674796747967482e-05, "loss": 0.1757, "step": 955 }, { "epoch": 4.67, "learning_rate": 2.9539295392953932e-05, "loss": 0.1164, "step": 960 }, { "epoch": 4.7, "learning_rate": 2.940379403794038e-05, "loss": 0.1469, "step": 965 }, { "epoch": 4.72, "learning_rate": 2.926829268292683e-05, "loss": 0.1539, "step": 970 }, { "epoch": 4.74, "learning_rate": 2.9132791327913276e-05, "loss": 0.1577, "step": 975 }, { "epoch": 4.77, "learning_rate": 2.8997289972899733e-05, "loss": 0.141, "step": 980 }, { "epoch": 4.79, "learning_rate": 2.886178861788618e-05, "loss": 0.1382, "step": 985 }, { "epoch": 4.82, "learning_rate": 2.872628726287263e-05, "loss": 0.1445, "step": 990 }, { "epoch": 4.84, "learning_rate": 2.859078590785908e-05, "loss": 0.179, "step": 995 }, { "epoch": 4.87, "learning_rate": 2.8455284552845528e-05, "loss": 0.1244, "step": 1000 }, { "epoch": 4.89, "learning_rate": 2.8319783197831978e-05, "loss": 0.172, "step": 1005 }, { "epoch": 4.91, "learning_rate": 2.8184281842818428e-05, "loss": 0.1341, "step": 1010 }, { "epoch": 4.94, "learning_rate": 2.8048780487804882e-05, "loss": 0.1456, "step": 1015 }, { "epoch": 4.96, "learning_rate": 2.7913279132791332e-05, "loss": 0.1839, "step": 1020 }, { "epoch": 4.99, "learning_rate": 2.777777777777778e-05, "loss": 0.1532, "step": 1025 }, { "epoch": 5.0, "eval_accuracy": 0.9760438056125941, "eval_loss": 0.08150195330381393, "eval_runtime": 21.7472, "eval_samples_per_second": 134.362, "eval_steps_per_second": 4.23, "step": 1027 }, { "epoch": 5.01, "learning_rate": 2.764227642276423e-05, "loss": 0.1426, "step": 1030 }, { "epoch": 5.04, "learning_rate": 2.750677506775068e-05, "loss": 0.1323, "step": 1035 }, { "epoch": 5.06, "learning_rate": 2.7371273712737127e-05, "loss": 0.101, "step": 1040 }, { "epoch": 5.09, "learning_rate": 2.7235772357723577e-05, "loss": 0.1545, "step": 1045 }, { "epoch": 5.11, "learning_rate": 2.7100271002710027e-05, "loss": 0.1368, "step": 1050 }, { "epoch": 5.13, "learning_rate": 2.696476964769648e-05, "loss": 0.1524, "step": 1055 }, { "epoch": 5.16, "learning_rate": 2.682926829268293e-05, "loss": 0.1483, "step": 1060 }, { "epoch": 5.18, "learning_rate": 2.6693766937669378e-05, "loss": 0.1225, "step": 1065 }, { "epoch": 5.21, "learning_rate": 2.6558265582655828e-05, "loss": 0.1314, "step": 1070 }, { "epoch": 5.23, "learning_rate": 2.642276422764228e-05, "loss": 0.1565, "step": 1075 }, { "epoch": 5.26, "learning_rate": 2.6287262872628725e-05, "loss": 0.1479, "step": 1080 }, { "epoch": 5.28, "learning_rate": 2.6151761517615176e-05, "loss": 0.1391, "step": 1085 }, { "epoch": 5.3, "learning_rate": 2.601626016260163e-05, "loss": 0.1049, "step": 1090 }, { "epoch": 5.33, "learning_rate": 2.588075880758808e-05, "loss": 0.156, "step": 1095 }, { "epoch": 5.35, "learning_rate": 2.574525745257453e-05, "loss": 0.1518, "step": 1100 }, { "epoch": 5.38, "learning_rate": 2.5609756097560977e-05, "loss": 0.1462, "step": 1105 }, { "epoch": 5.4, "learning_rate": 2.5474254742547427e-05, "loss": 0.1438, "step": 1110 }, { "epoch": 5.43, "learning_rate": 2.5338753387533877e-05, "loss": 0.1274, "step": 1115 }, { "epoch": 5.45, "learning_rate": 2.5203252032520324e-05, "loss": 0.1378, "step": 1120 }, { "epoch": 5.47, "learning_rate": 2.5067750677506774e-05, "loss": 0.1273, "step": 1125 }, { "epoch": 5.5, "learning_rate": 2.4932249322493225e-05, "loss": 0.1386, "step": 1130 }, { "epoch": 5.52, "learning_rate": 2.4796747967479675e-05, "loss": 0.1615, "step": 1135 }, { "epoch": 5.55, "learning_rate": 2.4661246612466125e-05, "loss": 0.1052, "step": 1140 }, { "epoch": 5.57, "learning_rate": 2.4525745257452575e-05, "loss": 0.1767, "step": 1145 }, { "epoch": 5.6, "learning_rate": 2.4390243902439026e-05, "loss": 0.1354, "step": 1150 }, { "epoch": 5.62, "learning_rate": 2.4254742547425473e-05, "loss": 0.1458, "step": 1155 }, { "epoch": 5.64, "learning_rate": 2.4119241192411926e-05, "loss": 0.1144, "step": 1160 }, { "epoch": 5.67, "learning_rate": 2.3983739837398377e-05, "loss": 0.127, "step": 1165 }, { "epoch": 5.69, "learning_rate": 2.3848238482384823e-05, "loss": 0.1103, "step": 1170 }, { "epoch": 5.72, "learning_rate": 2.3712737127371277e-05, "loss": 0.1934, "step": 1175 }, { "epoch": 5.74, "learning_rate": 2.3577235772357724e-05, "loss": 0.1208, "step": 1180 }, { "epoch": 5.77, "learning_rate": 2.3441734417344174e-05, "loss": 0.1286, "step": 1185 }, { "epoch": 5.79, "learning_rate": 2.3306233062330625e-05, "loss": 0.1495, "step": 1190 }, { "epoch": 5.82, "learning_rate": 2.3170731707317075e-05, "loss": 0.1243, "step": 1195 }, { "epoch": 5.84, "learning_rate": 2.3035230352303525e-05, "loss": 0.1316, "step": 1200 }, { "epoch": 5.86, "learning_rate": 2.2899728997289975e-05, "loss": 0.1777, "step": 1205 }, { "epoch": 5.89, "learning_rate": 2.2764227642276422e-05, "loss": 0.1363, "step": 1210 }, { "epoch": 5.91, "learning_rate": 2.2628726287262876e-05, "loss": 0.1467, "step": 1215 }, { "epoch": 5.94, "learning_rate": 2.2493224932249323e-05, "loss": 0.1484, "step": 1220 }, { "epoch": 5.96, "learning_rate": 2.2357723577235773e-05, "loss": 0.1041, "step": 1225 }, { "epoch": 5.99, "learning_rate": 2.2222222222222223e-05, "loss": 0.1311, "step": 1230 }, { "epoch": 6.0, "eval_accuracy": 0.9804928131416838, "eval_loss": 0.06673090904951096, "eval_runtime": 21.2329, "eval_samples_per_second": 137.616, "eval_steps_per_second": 4.333, "step": 1233 }, { "epoch": 6.01, "learning_rate": 2.2086720867208674e-05, "loss": 0.1585, "step": 1235 }, { "epoch": 6.03, "learning_rate": 2.1951219512195124e-05, "loss": 0.0896, "step": 1240 }, { "epoch": 6.06, "learning_rate": 2.181571815718157e-05, "loss": 0.1285, "step": 1245 }, { "epoch": 6.08, "learning_rate": 2.1680216802168024e-05, "loss": 0.1674, "step": 1250 }, { "epoch": 6.11, "learning_rate": 2.1544715447154475e-05, "loss": 0.129, "step": 1255 }, { "epoch": 6.13, "learning_rate": 2.140921409214092e-05, "loss": 0.1062, "step": 1260 }, { "epoch": 6.16, "learning_rate": 2.1273712737127372e-05, "loss": 0.1407, "step": 1265 }, { "epoch": 6.18, "learning_rate": 2.1138211382113822e-05, "loss": 0.1303, "step": 1270 }, { "epoch": 6.2, "learning_rate": 2.1002710027100272e-05, "loss": 0.125, "step": 1275 }, { "epoch": 6.23, "learning_rate": 2.0867208672086723e-05, "loss": 0.1012, "step": 1280 }, { "epoch": 6.25, "learning_rate": 2.073170731707317e-05, "loss": 0.1349, "step": 1285 }, { "epoch": 6.28, "learning_rate": 2.0596205962059623e-05, "loss": 0.1091, "step": 1290 }, { "epoch": 6.3, "learning_rate": 2.046070460704607e-05, "loss": 0.1129, "step": 1295 }, { "epoch": 6.33, "learning_rate": 2.032520325203252e-05, "loss": 0.1401, "step": 1300 }, { "epoch": 6.35, "learning_rate": 2.018970189701897e-05, "loss": 0.1221, "step": 1305 }, { "epoch": 6.37, "learning_rate": 2.005420054200542e-05, "loss": 0.0942, "step": 1310 }, { "epoch": 6.4, "learning_rate": 1.991869918699187e-05, "loss": 0.1388, "step": 1315 }, { "epoch": 6.42, "learning_rate": 1.978319783197832e-05, "loss": 0.1218, "step": 1320 }, { "epoch": 6.45, "learning_rate": 1.9647696476964768e-05, "loss": 0.1153, "step": 1325 }, { "epoch": 6.47, "learning_rate": 1.9512195121951222e-05, "loss": 0.152, "step": 1330 }, { "epoch": 6.5, "learning_rate": 1.937669376693767e-05, "loss": 0.1268, "step": 1335 }, { "epoch": 6.52, "learning_rate": 1.924119241192412e-05, "loss": 0.1328, "step": 1340 }, { "epoch": 6.55, "learning_rate": 1.9105691056910573e-05, "loss": 0.1374, "step": 1345 }, { "epoch": 6.57, "learning_rate": 1.897018970189702e-05, "loss": 0.1316, "step": 1350 }, { "epoch": 6.59, "learning_rate": 1.883468834688347e-05, "loss": 0.1332, "step": 1355 }, { "epoch": 6.62, "learning_rate": 1.869918699186992e-05, "loss": 0.1308, "step": 1360 }, { "epoch": 6.64, "learning_rate": 1.856368563685637e-05, "loss": 0.1097, "step": 1365 }, { "epoch": 6.67, "learning_rate": 1.842818428184282e-05, "loss": 0.1391, "step": 1370 }, { "epoch": 6.69, "learning_rate": 1.8292682926829268e-05, "loss": 0.1464, "step": 1375 }, { "epoch": 6.72, "learning_rate": 1.8157181571815718e-05, "loss": 0.1092, "step": 1380 }, { "epoch": 6.74, "learning_rate": 1.8021680216802168e-05, "loss": 0.1048, "step": 1385 }, { "epoch": 6.76, "learning_rate": 1.788617886178862e-05, "loss": 0.1089, "step": 1390 }, { "epoch": 6.79, "learning_rate": 1.775067750677507e-05, "loss": 0.1597, "step": 1395 }, { "epoch": 6.81, "learning_rate": 1.7615176151761516e-05, "loss": 0.1062, "step": 1400 }, { "epoch": 6.84, "learning_rate": 1.747967479674797e-05, "loss": 0.1317, "step": 1405 }, { "epoch": 6.86, "learning_rate": 1.734417344173442e-05, "loss": 0.083, "step": 1410 }, { "epoch": 6.89, "learning_rate": 1.7208672086720866e-05, "loss": 0.1756, "step": 1415 }, { "epoch": 6.91, "learning_rate": 1.707317073170732e-05, "loss": 0.1093, "step": 1420 }, { "epoch": 6.93, "learning_rate": 1.6937669376693767e-05, "loss": 0.1497, "step": 1425 }, { "epoch": 6.96, "learning_rate": 1.6802168021680217e-05, "loss": 0.1338, "step": 1430 }, { "epoch": 6.98, "learning_rate": 1.6666666666666667e-05, "loss": 0.1212, "step": 1435 }, { "epoch": 7.0, "eval_accuracy": 0.9804928131416838, "eval_loss": 0.0675366148352623, "eval_runtime": 21.3826, "eval_samples_per_second": 136.653, "eval_steps_per_second": 4.303, "step": 1438 }, { "epoch": 7.01, "learning_rate": 1.6531165311653118e-05, "loss": 0.1285, "step": 1440 }, { "epoch": 7.03, "learning_rate": 1.6395663956639568e-05, "loss": 0.1137, "step": 1445 }, { "epoch": 7.06, "learning_rate": 1.6260162601626018e-05, "loss": 0.1065, "step": 1450 }, { "epoch": 7.08, "learning_rate": 1.6124661246612465e-05, "loss": 0.1344, "step": 1455 }, { "epoch": 7.1, "learning_rate": 1.598915989159892e-05, "loss": 0.1343, "step": 1460 }, { "epoch": 7.13, "learning_rate": 1.5853658536585366e-05, "loss": 0.1364, "step": 1465 }, { "epoch": 7.15, "learning_rate": 1.5718157181571816e-05, "loss": 0.0916, "step": 1470 }, { "epoch": 7.18, "learning_rate": 1.5582655826558266e-05, "loss": 0.1183, "step": 1475 }, { "epoch": 7.2, "learning_rate": 1.5447154471544717e-05, "loss": 0.0924, "step": 1480 }, { "epoch": 7.23, "learning_rate": 1.5311653116531167e-05, "loss": 0.0979, "step": 1485 }, { "epoch": 7.25, "learning_rate": 1.5176151761517615e-05, "loss": 0.1393, "step": 1490 }, { "epoch": 7.27, "learning_rate": 1.5040650406504067e-05, "loss": 0.1332, "step": 1495 }, { "epoch": 7.3, "learning_rate": 1.4905149051490516e-05, "loss": 0.1017, "step": 1500 }, { "epoch": 7.32, "learning_rate": 1.4769647696476966e-05, "loss": 0.1124, "step": 1505 }, { "epoch": 7.35, "learning_rate": 1.4634146341463415e-05, "loss": 0.133, "step": 1510 }, { "epoch": 7.37, "learning_rate": 1.4498644986449867e-05, "loss": 0.1389, "step": 1515 }, { "epoch": 7.4, "learning_rate": 1.4363143631436315e-05, "loss": 0.1296, "step": 1520 }, { "epoch": 7.42, "learning_rate": 1.4227642276422764e-05, "loss": 0.144, "step": 1525 }, { "epoch": 7.45, "learning_rate": 1.4092140921409214e-05, "loss": 0.1104, "step": 1530 }, { "epoch": 7.47, "learning_rate": 1.3956639566395666e-05, "loss": 0.1089, "step": 1535 }, { "epoch": 7.49, "learning_rate": 1.3821138211382115e-05, "loss": 0.1098, "step": 1540 }, { "epoch": 7.52, "learning_rate": 1.3685636856368563e-05, "loss": 0.1404, "step": 1545 }, { "epoch": 7.54, "learning_rate": 1.3550135501355014e-05, "loss": 0.121, "step": 1550 }, { "epoch": 7.57, "learning_rate": 1.3414634146341466e-05, "loss": 0.1223, "step": 1555 }, { "epoch": 7.59, "learning_rate": 1.3279132791327914e-05, "loss": 0.1421, "step": 1560 }, { "epoch": 7.62, "learning_rate": 1.3143631436314363e-05, "loss": 0.1299, "step": 1565 }, { "epoch": 7.64, "learning_rate": 1.3008130081300815e-05, "loss": 0.1048, "step": 1570 }, { "epoch": 7.66, "learning_rate": 1.2872628726287265e-05, "loss": 0.0924, "step": 1575 }, { "epoch": 7.69, "learning_rate": 1.2737127371273713e-05, "loss": 0.1248, "step": 1580 }, { "epoch": 7.71, "learning_rate": 1.2601626016260162e-05, "loss": 0.1488, "step": 1585 }, { "epoch": 7.74, "learning_rate": 1.2466124661246612e-05, "loss": 0.1429, "step": 1590 }, { "epoch": 7.76, "learning_rate": 1.2330623306233063e-05, "loss": 0.1318, "step": 1595 }, { "epoch": 7.79, "learning_rate": 1.2195121951219513e-05, "loss": 0.1389, "step": 1600 }, { "epoch": 7.81, "learning_rate": 1.2059620596205963e-05, "loss": 0.0929, "step": 1605 }, { "epoch": 7.83, "learning_rate": 1.1924119241192412e-05, "loss": 0.0922, "step": 1610 }, { "epoch": 7.86, "learning_rate": 1.1788617886178862e-05, "loss": 0.0889, "step": 1615 }, { "epoch": 7.88, "learning_rate": 1.1653116531165312e-05, "loss": 0.1047, "step": 1620 }, { "epoch": 7.91, "learning_rate": 1.1517615176151763e-05, "loss": 0.1099, "step": 1625 }, { "epoch": 7.93, "learning_rate": 1.1382113821138211e-05, "loss": 0.1038, "step": 1630 }, { "epoch": 7.96, "learning_rate": 1.1246612466124661e-05, "loss": 0.0829, "step": 1635 }, { "epoch": 7.98, "learning_rate": 1.1111111111111112e-05, "loss": 0.1637, "step": 1640 }, { "epoch": 8.0, "eval_accuracy": 0.9798083504449008, "eval_loss": 0.06967572867870331, "eval_runtime": 21.6361, "eval_samples_per_second": 135.052, "eval_steps_per_second": 4.252, "step": 1644 }, { "epoch": 8.0, "learning_rate": 1.0975609756097562e-05, "loss": 0.0999, "step": 1645 }, { "epoch": 8.03, "learning_rate": 1.0840108401084012e-05, "loss": 0.1016, "step": 1650 }, { "epoch": 8.05, "learning_rate": 1.070460704607046e-05, "loss": 0.1218, "step": 1655 }, { "epoch": 8.08, "learning_rate": 1.0569105691056911e-05, "loss": 0.1293, "step": 1660 }, { "epoch": 8.1, "learning_rate": 1.0433604336043361e-05, "loss": 0.1151, "step": 1665 }, { "epoch": 8.13, "learning_rate": 1.0298102981029812e-05, "loss": 0.1309, "step": 1670 }, { "epoch": 8.15, "learning_rate": 1.016260162601626e-05, "loss": 0.12, "step": 1675 }, { "epoch": 8.18, "learning_rate": 1.002710027100271e-05, "loss": 0.1278, "step": 1680 }, { "epoch": 8.2, "learning_rate": 9.89159891598916e-06, "loss": 0.0949, "step": 1685 }, { "epoch": 8.22, "learning_rate": 9.756097560975611e-06, "loss": 0.108, "step": 1690 }, { "epoch": 8.25, "learning_rate": 9.62059620596206e-06, "loss": 0.1144, "step": 1695 }, { "epoch": 8.27, "learning_rate": 9.48509485094851e-06, "loss": 0.1016, "step": 1700 }, { "epoch": 8.3, "learning_rate": 9.34959349593496e-06, "loss": 0.0884, "step": 1705 }, { "epoch": 8.32, "learning_rate": 9.21409214092141e-06, "loss": 0.093, "step": 1710 }, { "epoch": 8.35, "learning_rate": 9.078590785907859e-06, "loss": 0.0766, "step": 1715 }, { "epoch": 8.37, "learning_rate": 8.94308943089431e-06, "loss": 0.1495, "step": 1720 }, { "epoch": 8.39, "learning_rate": 8.807588075880758e-06, "loss": 0.1037, "step": 1725 }, { "epoch": 8.42, "learning_rate": 8.67208672086721e-06, "loss": 0.1294, "step": 1730 }, { "epoch": 8.44, "learning_rate": 8.53658536585366e-06, "loss": 0.0902, "step": 1735 }, { "epoch": 8.47, "learning_rate": 8.401084010840109e-06, "loss": 0.0931, "step": 1740 }, { "epoch": 8.49, "learning_rate": 8.265582655826559e-06, "loss": 0.108, "step": 1745 }, { "epoch": 8.52, "learning_rate": 8.130081300813009e-06, "loss": 0.0974, "step": 1750 }, { "epoch": 8.54, "learning_rate": 7.99457994579946e-06, "loss": 0.092, "step": 1755 }, { "epoch": 8.56, "learning_rate": 7.859078590785908e-06, "loss": 0.095, "step": 1760 }, { "epoch": 8.59, "learning_rate": 7.723577235772358e-06, "loss": 0.1177, "step": 1765 }, { "epoch": 8.61, "learning_rate": 7.588075880758808e-06, "loss": 0.1242, "step": 1770 }, { "epoch": 8.64, "learning_rate": 7.452574525745258e-06, "loss": 0.1228, "step": 1775 }, { "epoch": 8.66, "learning_rate": 7.317073170731707e-06, "loss": 0.1003, "step": 1780 }, { "epoch": 8.69, "learning_rate": 7.181571815718158e-06, "loss": 0.1152, "step": 1785 }, { "epoch": 8.71, "learning_rate": 7.046070460704607e-06, "loss": 0.0998, "step": 1790 }, { "epoch": 8.73, "learning_rate": 6.910569105691057e-06, "loss": 0.0974, "step": 1795 }, { "epoch": 8.76, "learning_rate": 6.775067750677507e-06, "loss": 0.0862, "step": 1800 }, { "epoch": 8.78, "learning_rate": 6.639566395663957e-06, "loss": 0.134, "step": 1805 }, { "epoch": 8.81, "learning_rate": 6.504065040650407e-06, "loss": 0.1038, "step": 1810 }, { "epoch": 8.83, "learning_rate": 6.368563685636857e-06, "loss": 0.1416, "step": 1815 }, { "epoch": 8.86, "learning_rate": 6.233062330623306e-06, "loss": 0.1246, "step": 1820 }, { "epoch": 8.88, "learning_rate": 6.0975609756097564e-06, "loss": 0.0954, "step": 1825 }, { "epoch": 8.91, "learning_rate": 5.962059620596206e-06, "loss": 0.1269, "step": 1830 }, { "epoch": 8.93, "learning_rate": 5.826558265582656e-06, "loss": 0.0913, "step": 1835 }, { "epoch": 8.95, "learning_rate": 5.6910569105691056e-06, "loss": 0.1073, "step": 1840 }, { "epoch": 8.98, "learning_rate": 5.555555555555556e-06, "loss": 0.116, "step": 1845 }, { "epoch": 9.0, "eval_accuracy": 0.9811772758384668, "eval_loss": 0.06383541971445084, "eval_runtime": 21.8412, "eval_samples_per_second": 133.784, "eval_steps_per_second": 4.212, "step": 1849 }, { "epoch": 9.0, "learning_rate": 5.420054200542006e-06, "loss": 0.0959, "step": 1850 }, { "epoch": 9.03, "learning_rate": 5.2845528455284555e-06, "loss": 0.1091, "step": 1855 }, { "epoch": 9.05, "learning_rate": 5.149051490514906e-06, "loss": 0.0918, "step": 1860 }, { "epoch": 9.08, "learning_rate": 5.013550135501355e-06, "loss": 0.0973, "step": 1865 }, { "epoch": 9.1, "learning_rate": 4.8780487804878055e-06, "loss": 0.0981, "step": 1870 }, { "epoch": 9.12, "learning_rate": 4.742547425474255e-06, "loss": 0.0797, "step": 1875 }, { "epoch": 9.15, "learning_rate": 4.607046070460705e-06, "loss": 0.1161, "step": 1880 }, { "epoch": 9.17, "learning_rate": 4.471544715447155e-06, "loss": 0.1062, "step": 1885 }, { "epoch": 9.2, "learning_rate": 4.336043360433605e-06, "loss": 0.0973, "step": 1890 }, { "epoch": 9.22, "learning_rate": 4.200542005420054e-06, "loss": 0.0964, "step": 1895 }, { "epoch": 9.25, "learning_rate": 4.0650406504065046e-06, "loss": 0.0953, "step": 1900 }, { "epoch": 9.27, "learning_rate": 3.929539295392954e-06, "loss": 0.0841, "step": 1905 }, { "epoch": 9.29, "learning_rate": 3.794037940379404e-06, "loss": 0.1088, "step": 1910 }, { "epoch": 9.32, "learning_rate": 3.6585365853658537e-06, "loss": 0.1275, "step": 1915 }, { "epoch": 9.34, "learning_rate": 3.5230352303523035e-06, "loss": 0.1055, "step": 1920 }, { "epoch": 9.37, "learning_rate": 3.3875338753387534e-06, "loss": 0.1103, "step": 1925 }, { "epoch": 9.39, "learning_rate": 3.2520325203252037e-06, "loss": 0.0943, "step": 1930 }, { "epoch": 9.42, "learning_rate": 3.116531165311653e-06, "loss": 0.1117, "step": 1935 }, { "epoch": 9.44, "learning_rate": 2.981029810298103e-06, "loss": 0.0888, "step": 1940 }, { "epoch": 9.46, "learning_rate": 2.8455284552845528e-06, "loss": 0.1169, "step": 1945 }, { "epoch": 9.49, "learning_rate": 2.710027100271003e-06, "loss": 0.0857, "step": 1950 }, { "epoch": 9.51, "learning_rate": 2.574525745257453e-06, "loss": 0.0776, "step": 1955 }, { "epoch": 9.54, "learning_rate": 2.4390243902439027e-06, "loss": 0.0843, "step": 1960 }, { "epoch": 9.56, "learning_rate": 2.3035230352303526e-06, "loss": 0.0758, "step": 1965 }, { "epoch": 9.59, "learning_rate": 2.1680216802168024e-06, "loss": 0.1109, "step": 1970 }, { "epoch": 9.61, "learning_rate": 2.0325203252032523e-06, "loss": 0.1277, "step": 1975 }, { "epoch": 9.64, "learning_rate": 1.897018970189702e-06, "loss": 0.1218, "step": 1980 }, { "epoch": 9.66, "learning_rate": 1.7615176151761518e-06, "loss": 0.1389, "step": 1985 }, { "epoch": 9.68, "learning_rate": 1.6260162601626018e-06, "loss": 0.1061, "step": 1990 }, { "epoch": 9.71, "learning_rate": 1.4905149051490515e-06, "loss": 0.1006, "step": 1995 }, { "epoch": 9.73, "learning_rate": 1.3550135501355015e-06, "loss": 0.116, "step": 2000 }, { "epoch": 9.76, "learning_rate": 1.2195121951219514e-06, "loss": 0.1143, "step": 2005 }, { "epoch": 9.78, "learning_rate": 1.0840108401084012e-06, "loss": 0.1214, "step": 2010 }, { "epoch": 9.81, "learning_rate": 9.48509485094851e-07, "loss": 0.1353, "step": 2015 }, { "epoch": 9.83, "learning_rate": 8.130081300813009e-07, "loss": 0.1078, "step": 2020 }, { "epoch": 9.85, "learning_rate": 6.775067750677508e-07, "loss": 0.0809, "step": 2025 }, { "epoch": 9.88, "learning_rate": 5.420054200542006e-07, "loss": 0.1068, "step": 2030 }, { "epoch": 9.9, "learning_rate": 4.0650406504065046e-07, "loss": 0.0909, "step": 2035 }, { "epoch": 9.93, "learning_rate": 2.710027100271003e-07, "loss": 0.0828, "step": 2040 }, { "epoch": 9.95, "learning_rate": 1.3550135501355015e-07, "loss": 0.1054, "step": 2045 }, { "epoch": 9.98, "learning_rate": 0.0, "loss": 0.085, "step": 2050 }, { "epoch": 9.98, "eval_accuracy": 0.9798083504449008, "eval_loss": 0.06385670602321625, "eval_runtime": 21.9428, "eval_samples_per_second": 133.164, "eval_steps_per_second": 4.193, "step": 2050 }, { "epoch": 9.98, "step": 2050, "total_flos": 6.521150663842333e+18, "train_loss": 0.21471095208714647, "train_runtime": 4563.3157, "train_samples_per_second": 57.625, "train_steps_per_second": 0.449 } ], "logging_steps": 5, "max_steps": 2050, "num_train_epochs": 10, "save_steps": 500, "total_flos": 6.521150663842333e+18, "trial_name": null, "trial_params": null }