{ "best_metric": 0.13810041546821594, "best_model_checkpoint": "frost-mobile-apple__mobilevit-xx-small-v2024-10-22/checkpoint-1500", "epoch": 30.0, "eval_steps": 100, "global_step": 1710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17543859649122806, "grad_norm": 1.4629484415054321, "learning_rate": 1.1695906432748537e-05, "loss": 0.1499, "step": 10 }, { "epoch": 0.3508771929824561, "grad_norm": 1.593600869178772, "learning_rate": 2.2222222222222223e-05, "loss": 0.1837, "step": 20 }, { "epoch": 0.5263157894736842, "grad_norm": 0.6477800607681274, "learning_rate": 3.391812865497076e-05, "loss": 0.1569, "step": 30 }, { "epoch": 0.7017543859649122, "grad_norm": 1.953596591949463, "learning_rate": 4.56140350877193e-05, "loss": 0.1976, "step": 40 }, { "epoch": 0.8771929824561403, "grad_norm": 3.2343287467956543, "learning_rate": 5.7309941520467835e-05, "loss": 0.1688, "step": 50 }, { "epoch": 1.0526315789473684, "grad_norm": 2.1099464893341064, "learning_rate": 6.900584795321637e-05, "loss": 0.2217, "step": 60 }, { "epoch": 1.2280701754385965, "grad_norm": 0.6702929139137268, "learning_rate": 8.070175438596491e-05, "loss": 0.178, "step": 70 }, { "epoch": 1.4035087719298245, "grad_norm": 1.2941429615020752, "learning_rate": 9.239766081871345e-05, "loss": 0.1785, "step": 80 }, { "epoch": 1.5789473684210527, "grad_norm": 0.7907508015632629, "learning_rate": 0.000104093567251462, "loss": 0.1734, "step": 90 }, { "epoch": 1.7543859649122808, "grad_norm": 2.8165764808654785, "learning_rate": 0.00011578947368421053, "loss": 0.1927, "step": 100 }, { "epoch": 1.7543859649122808, "eval_accuracy": 0.9422222222222222, "eval_f1": 0.8565121412803532, "eval_loss": 0.14700087904930115, "eval_precision": 0.8565121412803532, "eval_recall": 0.8565121412803532, "eval_runtime": 1.8444, "eval_samples_per_second": 121.989, "eval_steps_per_second": 15.723, "step": 100 }, { "epoch": 1.9298245614035088, "grad_norm": 1.6650861501693726, "learning_rate": 0.00012748538011695908, "loss": 0.1955, "step": 110 }, { "epoch": 2.1052631578947367, "grad_norm": 2.2151546478271484, "learning_rate": 0.00013918128654970762, "loss": 0.1822, "step": 120 }, { "epoch": 2.280701754385965, "grad_norm": 2.724104642868042, "learning_rate": 0.00015087719298245616, "loss": 0.1756, "step": 130 }, { "epoch": 2.456140350877193, "grad_norm": 2.150078296661377, "learning_rate": 0.0001625730994152047, "loss": 0.2256, "step": 140 }, { "epoch": 2.6315789473684212, "grad_norm": 1.6141526699066162, "learning_rate": 0.00017426900584795323, "loss": 0.2133, "step": 150 }, { "epoch": 2.807017543859649, "grad_norm": 0.5151457190513611, "learning_rate": 0.00018596491228070177, "loss": 0.1651, "step": 160 }, { "epoch": 2.982456140350877, "grad_norm": 0.7054281830787659, "learning_rate": 0.0001976608187134503, "loss": 0.1798, "step": 170 }, { "epoch": 3.1578947368421053, "grad_norm": 1.5373905897140503, "learning_rate": 0.0001989603638726446, "loss": 0.2036, "step": 180 }, { "epoch": 3.3333333333333335, "grad_norm": 2.1035780906677246, "learning_rate": 0.0001977907732293697, "loss": 0.1978, "step": 190 }, { "epoch": 3.5087719298245617, "grad_norm": 4.142488956451416, "learning_rate": 0.00019649122807017543, "loss": 0.1601, "step": 200 }, { "epoch": 3.5087719298245617, "eval_accuracy": 0.9444444444444444, "eval_f1": 0.8615725359911407, "eval_loss": 0.14985999464988708, "eval_precision": 0.8644444444444445, "eval_recall": 0.8587196467991169, "eval_runtime": 1.8101, "eval_samples_per_second": 124.3, "eval_steps_per_second": 16.021, "step": 200 }, { "epoch": 3.6842105263157894, "grad_norm": 2.600123167037964, "learning_rate": 0.00019519168291098115, "loss": 0.1839, "step": 210 }, { "epoch": 3.8596491228070176, "grad_norm": 0.9239832758903503, "learning_rate": 0.00019389213775178687, "loss": 0.1777, "step": 220 }, { "epoch": 4.035087719298246, "grad_norm": 1.9398410320281982, "learning_rate": 0.0001925925925925926, "loss": 0.1901, "step": 230 }, { "epoch": 4.2105263157894735, "grad_norm": 0.5486197471618652, "learning_rate": 0.00019129304743339831, "loss": 0.2089, "step": 240 }, { "epoch": 4.385964912280702, "grad_norm": 1.0370583534240723, "learning_rate": 0.00018999350227420404, "loss": 0.1968, "step": 250 }, { "epoch": 4.56140350877193, "grad_norm": 0.6489719748497009, "learning_rate": 0.00018869395711500976, "loss": 0.1894, "step": 260 }, { "epoch": 4.7368421052631575, "grad_norm": 1.9817498922348022, "learning_rate": 0.00018739441195581545, "loss": 0.1837, "step": 270 }, { "epoch": 4.912280701754386, "grad_norm": 1.9093741178512573, "learning_rate": 0.00018609486679662117, "loss": 0.1757, "step": 280 }, { "epoch": 5.087719298245614, "grad_norm": 3.8483004570007324, "learning_rate": 0.0001847953216374269, "loss": 0.2078, "step": 290 }, { "epoch": 5.2631578947368425, "grad_norm": 1.245739221572876, "learning_rate": 0.0001834957764782326, "loss": 0.1544, "step": 300 }, { "epoch": 5.2631578947368425, "eval_accuracy": 0.9391111111111111, "eval_f1": 0.8492849284928493, "eval_loss": 0.15355795621871948, "eval_precision": 0.8464912280701754, "eval_recall": 0.8520971302428256, "eval_runtime": 1.9338, "eval_samples_per_second": 116.35, "eval_steps_per_second": 14.996, "step": 300 }, { "epoch": 5.43859649122807, "grad_norm": 1.5718433856964111, "learning_rate": 0.00018219623131903833, "loss": 0.1846, "step": 310 }, { "epoch": 5.614035087719298, "grad_norm": 1.846637487411499, "learning_rate": 0.00018089668615984406, "loss": 0.1894, "step": 320 }, { "epoch": 5.7894736842105265, "grad_norm": 1.1481417417526245, "learning_rate": 0.00017959714100064978, "loss": 0.1789, "step": 330 }, { "epoch": 5.964912280701754, "grad_norm": 8.032550811767578, "learning_rate": 0.0001782975958414555, "loss": 0.1771, "step": 340 }, { "epoch": 6.140350877192983, "grad_norm": 1.2688547372817993, "learning_rate": 0.00017699805068226122, "loss": 0.1634, "step": 350 }, { "epoch": 6.315789473684211, "grad_norm": 2.1827070713043213, "learning_rate": 0.0001756985055230669, "loss": 0.1738, "step": 360 }, { "epoch": 6.491228070175438, "grad_norm": 1.214490294456482, "learning_rate": 0.00017439896036387263, "loss": 0.1829, "step": 370 }, { "epoch": 6.666666666666667, "grad_norm": 1.8781157732009888, "learning_rate": 0.00017309941520467836, "loss": 0.171, "step": 380 }, { "epoch": 6.842105263157895, "grad_norm": 1.6777713298797607, "learning_rate": 0.00017179987004548408, "loss": 0.1842, "step": 390 }, { "epoch": 7.017543859649122, "grad_norm": 1.8125029802322388, "learning_rate": 0.0001705003248862898, "loss": 0.207, "step": 400 }, { "epoch": 7.017543859649122, "eval_accuracy": 0.9435555555555556, "eval_f1": 0.8574635241301908, "eval_loss": 0.1374116837978363, "eval_precision": 0.8721461187214612, "eval_recall": 0.8432671081677704, "eval_runtime": 2.1099, "eval_samples_per_second": 106.642, "eval_steps_per_second": 13.745, "step": 400 }, { "epoch": 7.192982456140351, "grad_norm": 1.9295843839645386, "learning_rate": 0.00016920077972709552, "loss": 0.1647, "step": 410 }, { "epoch": 7.368421052631579, "grad_norm": 0.6302610039710999, "learning_rate": 0.00016790123456790124, "loss": 0.1653, "step": 420 }, { "epoch": 7.543859649122807, "grad_norm": 1.8599541187286377, "learning_rate": 0.00016660168940870696, "loss": 0.1582, "step": 430 }, { "epoch": 7.719298245614035, "grad_norm": 1.6737511157989502, "learning_rate": 0.00016530214424951268, "loss": 0.1835, "step": 440 }, { "epoch": 7.894736842105263, "grad_norm": 2.235042095184326, "learning_rate": 0.00016400259909031838, "loss": 0.1858, "step": 450 }, { "epoch": 8.070175438596491, "grad_norm": 1.0965207815170288, "learning_rate": 0.0001627030539311241, "loss": 0.1963, "step": 460 }, { "epoch": 8.24561403508772, "grad_norm": 1.6453418731689453, "learning_rate": 0.00016140350877192982, "loss": 0.1577, "step": 470 }, { "epoch": 8.421052631578947, "grad_norm": 1.0128905773162842, "learning_rate": 0.00016010396361273554, "loss": 0.1685, "step": 480 }, { "epoch": 8.596491228070175, "grad_norm": 1.2745692729949951, "learning_rate": 0.00015880441845354126, "loss": 0.1838, "step": 490 }, { "epoch": 8.771929824561404, "grad_norm": 2.066593885421753, "learning_rate": 0.00015750487329434698, "loss": 0.1709, "step": 500 }, { "epoch": 8.771929824561404, "eval_accuracy": 0.9431111111111111, "eval_f1": 0.8587196467991169, "eval_loss": 0.14432799816131592, "eval_precision": 0.8587196467991169, "eval_recall": 0.8587196467991169, "eval_runtime": 1.819, "eval_samples_per_second": 123.693, "eval_steps_per_second": 15.943, "step": 500 }, { "epoch": 8.947368421052632, "grad_norm": 0.8924937844276428, "learning_rate": 0.0001562053281351527, "loss": 0.1676, "step": 510 }, { "epoch": 9.12280701754386, "grad_norm": 2.818302869796753, "learning_rate": 0.00015490578297595842, "loss": 0.1861, "step": 520 }, { "epoch": 9.298245614035087, "grad_norm": 0.8714137673377991, "learning_rate": 0.00015360623781676414, "loss": 0.1642, "step": 530 }, { "epoch": 9.473684210526315, "grad_norm": 1.9320534467697144, "learning_rate": 0.00015230669265756984, "loss": 0.1744, "step": 540 }, { "epoch": 9.649122807017545, "grad_norm": 0.8549228310585022, "learning_rate": 0.00015100714749837556, "loss": 0.1556, "step": 550 }, { "epoch": 9.824561403508772, "grad_norm": 4.155418872833252, "learning_rate": 0.00014970760233918128, "loss": 0.153, "step": 560 }, { "epoch": 10.0, "grad_norm": 5.535355567932129, "learning_rate": 0.000148408057179987, "loss": 0.1773, "step": 570 }, { "epoch": 10.175438596491228, "grad_norm": 2.8799245357513428, "learning_rate": 0.00014710851202079272, "loss": 0.1464, "step": 580 }, { "epoch": 10.350877192982455, "grad_norm": 0.8742926716804504, "learning_rate": 0.00014580896686159844, "loss": 0.179, "step": 590 }, { "epoch": 10.526315789473685, "grad_norm": 1.1748207807540894, "learning_rate": 0.00014450942170240417, "loss": 0.1548, "step": 600 }, { "epoch": 10.526315789473685, "eval_accuracy": 0.9386666666666666, "eval_f1": 0.849015317286652, "eval_loss": 0.15721388161182404, "eval_precision": 0.841648590021692, "eval_recall": 0.8565121412803532, "eval_runtime": 1.7838, "eval_samples_per_second": 126.135, "eval_steps_per_second": 16.257, "step": 600 }, { "epoch": 10.701754385964913, "grad_norm": 0.5813456177711487, "learning_rate": 0.00014320987654320989, "loss": 0.1646, "step": 610 }, { "epoch": 10.87719298245614, "grad_norm": 1.4447375535964966, "learning_rate": 0.00014191033138401558, "loss": 0.1664, "step": 620 }, { "epoch": 11.052631578947368, "grad_norm": 1.4312775135040283, "learning_rate": 0.0001406107862248213, "loss": 0.17, "step": 630 }, { "epoch": 11.228070175438596, "grad_norm": 1.5336912870407104, "learning_rate": 0.00013931124106562702, "loss": 0.1663, "step": 640 }, { "epoch": 11.403508771929825, "grad_norm": 1.266555666923523, "learning_rate": 0.00013801169590643274, "loss": 0.1472, "step": 650 }, { "epoch": 11.578947368421053, "grad_norm": 1.1763581037521362, "learning_rate": 0.00013671215074723846, "loss": 0.1518, "step": 660 }, { "epoch": 11.75438596491228, "grad_norm": 2.520512580871582, "learning_rate": 0.00013541260558804419, "loss": 0.1565, "step": 670 }, { "epoch": 11.929824561403509, "grad_norm": 1.1412580013275146, "learning_rate": 0.0001341130604288499, "loss": 0.1801, "step": 680 }, { "epoch": 12.105263157894736, "grad_norm": 1.172298789024353, "learning_rate": 0.00013281351526965563, "loss": 0.1647, "step": 690 }, { "epoch": 12.280701754385966, "grad_norm": 0.8876182436943054, "learning_rate": 0.00013151397011046135, "loss": 0.1802, "step": 700 }, { "epoch": 12.280701754385966, "eval_accuracy": 0.9457777777777778, "eval_f1": 0.8656387665198237, "eval_loss": 0.1435655653476715, "eval_precision": 0.8637362637362638, "eval_recall": 0.8675496688741722, "eval_runtime": 2.5494, "eval_samples_per_second": 88.254, "eval_steps_per_second": 11.375, "step": 700 }, { "epoch": 12.456140350877194, "grad_norm": 1.5246055126190186, "learning_rate": 0.00013021442495126704, "loss": 0.1615, "step": 710 }, { "epoch": 12.631578947368421, "grad_norm": 1.5307178497314453, "learning_rate": 0.00012891487979207276, "loss": 0.1428, "step": 720 }, { "epoch": 12.807017543859649, "grad_norm": 1.6796847581863403, "learning_rate": 0.00012761533463287849, "loss": 0.1639, "step": 730 }, { "epoch": 12.982456140350877, "grad_norm": 0.943722128868103, "learning_rate": 0.0001263157894736842, "loss": 0.144, "step": 740 }, { "epoch": 13.157894736842104, "grad_norm": 1.7422752380371094, "learning_rate": 0.00012501624431448993, "loss": 0.1791, "step": 750 }, { "epoch": 13.333333333333334, "grad_norm": 0.629623532295227, "learning_rate": 0.00012371669915529565, "loss": 0.1647, "step": 760 }, { "epoch": 13.508771929824562, "grad_norm": 2.9177167415618896, "learning_rate": 0.00012241715399610137, "loss": 0.1679, "step": 770 }, { "epoch": 13.68421052631579, "grad_norm": 2.1729178428649902, "learning_rate": 0.00012111760883690708, "loss": 0.1497, "step": 780 }, { "epoch": 13.859649122807017, "grad_norm": 1.9510128498077393, "learning_rate": 0.0001198180636777128, "loss": 0.1539, "step": 790 }, { "epoch": 14.035087719298245, "grad_norm": 2.3144478797912598, "learning_rate": 0.00011851851851851852, "loss": 0.1455, "step": 800 }, { "epoch": 14.035087719298245, "eval_accuracy": 0.9466666666666667, "eval_f1": 0.8666666666666667, "eval_loss": 0.14417320489883423, "eval_precision": 0.87248322147651, "eval_recall": 0.8609271523178808, "eval_runtime": 1.8068, "eval_samples_per_second": 124.527, "eval_steps_per_second": 16.05, "step": 800 }, { "epoch": 14.210526315789474, "grad_norm": 1.6484512090682983, "learning_rate": 0.00011721897335932424, "loss": 0.1386, "step": 810 }, { "epoch": 14.385964912280702, "grad_norm": 1.467768907546997, "learning_rate": 0.00011591942820012995, "loss": 0.1698, "step": 820 }, { "epoch": 14.56140350877193, "grad_norm": 1.1191186904907227, "learning_rate": 0.00011461988304093567, "loss": 0.129, "step": 830 }, { "epoch": 14.736842105263158, "grad_norm": 1.83295738697052, "learning_rate": 0.00011332033788174139, "loss": 0.1774, "step": 840 }, { "epoch": 14.912280701754385, "grad_norm": 2.5583319664001465, "learning_rate": 0.00011202079272254711, "loss": 0.1507, "step": 850 }, { "epoch": 15.087719298245615, "grad_norm": 1.907935380935669, "learning_rate": 0.00011072124756335282, "loss": 0.1482, "step": 860 }, { "epoch": 15.263157894736842, "grad_norm": 3.6083953380584717, "learning_rate": 0.00010942170240415854, "loss": 0.1447, "step": 870 }, { "epoch": 15.43859649122807, "grad_norm": 2.4027905464172363, "learning_rate": 0.00010812215724496426, "loss": 0.1653, "step": 880 }, { "epoch": 15.614035087719298, "grad_norm": 1.2639557123184204, "learning_rate": 0.00010682261208576998, "loss": 0.1437, "step": 890 }, { "epoch": 15.789473684210526, "grad_norm": 0.6765735149383545, "learning_rate": 0.0001055230669265757, "loss": 0.1514, "step": 900 }, { "epoch": 15.789473684210526, "eval_accuracy": 0.9422222222222222, "eval_f1": 0.8571428571428571, "eval_loss": 0.1499728262424469, "eval_precision": 0.8533916849015317, "eval_recall": 0.8609271523178808, "eval_runtime": 1.7856, "eval_samples_per_second": 126.01, "eval_steps_per_second": 16.241, "step": 900 }, { "epoch": 15.964912280701755, "grad_norm": 0.6137629151344299, "learning_rate": 0.00010422352176738141, "loss": 0.1299, "step": 910 }, { "epoch": 16.140350877192983, "grad_norm": 2.0917060375213623, "learning_rate": 0.00010292397660818713, "loss": 0.1531, "step": 920 }, { "epoch": 16.31578947368421, "grad_norm": 3.555748462677002, "learning_rate": 0.00010162443144899285, "loss": 0.1238, "step": 930 }, { "epoch": 16.49122807017544, "grad_norm": 3.039712429046631, "learning_rate": 0.00010032488628979857, "loss": 0.1429, "step": 940 }, { "epoch": 16.666666666666668, "grad_norm": 3.8946707248687744, "learning_rate": 9.902534113060428e-05, "loss": 0.1597, "step": 950 }, { "epoch": 16.842105263157894, "grad_norm": 3.3118174076080322, "learning_rate": 9.772579597141e-05, "loss": 0.1645, "step": 960 }, { "epoch": 17.017543859649123, "grad_norm": 2.1213653087615967, "learning_rate": 9.642625081221572e-05, "loss": 0.165, "step": 970 }, { "epoch": 17.19298245614035, "grad_norm": 0.641942024230957, "learning_rate": 9.512670565302145e-05, "loss": 0.149, "step": 980 }, { "epoch": 17.36842105263158, "grad_norm": 1.734876036643982, "learning_rate": 9.382716049382717e-05, "loss": 0.1412, "step": 990 }, { "epoch": 17.54385964912281, "grad_norm": 2.6415109634399414, "learning_rate": 9.252761533463287e-05, "loss": 0.1368, "step": 1000 }, { "epoch": 17.54385964912281, "eval_accuracy": 0.9488888888888889, "eval_f1": 0.8717948717948718, "eval_loss": 0.13907207548618317, "eval_precision": 0.8806306306306306, "eval_recall": 0.8631346578366446, "eval_runtime": 2.6894, "eval_samples_per_second": 83.662, "eval_steps_per_second": 10.783, "step": 1000 }, { "epoch": 17.719298245614034, "grad_norm": 1.5012660026550293, "learning_rate": 9.12280701754386e-05, "loss": 0.1686, "step": 1010 }, { "epoch": 17.894736842105264, "grad_norm": 1.1262547969818115, "learning_rate": 8.992852501624432e-05, "loss": 0.1524, "step": 1020 }, { "epoch": 18.07017543859649, "grad_norm": 2.33294677734375, "learning_rate": 8.862897985705004e-05, "loss": 0.1644, "step": 1030 }, { "epoch": 18.24561403508772, "grad_norm": 3.1242616176605225, "learning_rate": 8.732943469785574e-05, "loss": 0.1383, "step": 1040 }, { "epoch": 18.42105263157895, "grad_norm": 0.7557860612869263, "learning_rate": 8.602988953866147e-05, "loss": 0.1418, "step": 1050 }, { "epoch": 18.596491228070175, "grad_norm": 2.3483850955963135, "learning_rate": 8.473034437946719e-05, "loss": 0.168, "step": 1060 }, { "epoch": 18.771929824561404, "grad_norm": 1.8713488578796387, "learning_rate": 8.343079922027291e-05, "loss": 0.1378, "step": 1070 }, { "epoch": 18.94736842105263, "grad_norm": 0.9523454308509827, "learning_rate": 8.213125406107862e-05, "loss": 0.1235, "step": 1080 }, { "epoch": 19.12280701754386, "grad_norm": 1.5565508604049683, "learning_rate": 8.083170890188434e-05, "loss": 0.1322, "step": 1090 }, { "epoch": 19.29824561403509, "grad_norm": 1.2913304567337036, "learning_rate": 7.953216374269006e-05, "loss": 0.1515, "step": 1100 }, { "epoch": 19.29824561403509, "eval_accuracy": 0.9475555555555556, "eval_f1": 0.8700440528634361, "eval_loss": 0.1369754672050476, "eval_precision": 0.8681318681318682, "eval_recall": 0.8719646799116998, "eval_runtime": 1.7736, "eval_samples_per_second": 126.858, "eval_steps_per_second": 16.351, "step": 1100 }, { "epoch": 19.473684210526315, "grad_norm": 1.4073901176452637, "learning_rate": 7.823261858349578e-05, "loss": 0.1481, "step": 1110 }, { "epoch": 19.649122807017545, "grad_norm": 0.748474657535553, "learning_rate": 7.69330734243015e-05, "loss": 0.1508, "step": 1120 }, { "epoch": 19.82456140350877, "grad_norm": 0.907399594783783, "learning_rate": 7.563352826510721e-05, "loss": 0.1264, "step": 1130 }, { "epoch": 20.0, "grad_norm": 4.148804664611816, "learning_rate": 7.433398310591293e-05, "loss": 0.1514, "step": 1140 }, { "epoch": 20.17543859649123, "grad_norm": 1.1374047994613647, "learning_rate": 7.303443794671865e-05, "loss": 0.1486, "step": 1150 }, { "epoch": 20.350877192982455, "grad_norm": 1.877455711364746, "learning_rate": 7.173489278752437e-05, "loss": 0.1576, "step": 1160 }, { "epoch": 20.526315789473685, "grad_norm": 2.704329013824463, "learning_rate": 7.043534762833008e-05, "loss": 0.1614, "step": 1170 }, { "epoch": 20.70175438596491, "grad_norm": 1.0661389827728271, "learning_rate": 6.91358024691358e-05, "loss": 0.1471, "step": 1180 }, { "epoch": 20.87719298245614, "grad_norm": 2.461351156234741, "learning_rate": 6.783625730994152e-05, "loss": 0.1577, "step": 1190 }, { "epoch": 21.05263157894737, "grad_norm": 3.2571732997894287, "learning_rate": 6.653671215074724e-05, "loss": 0.1372, "step": 1200 }, { "epoch": 21.05263157894737, "eval_accuracy": 0.9457777777777778, "eval_f1": 0.8644444444444445, "eval_loss": 0.13932354748249054, "eval_precision": 0.8702460850111857, "eval_recall": 0.8587196467991169, "eval_runtime": 1.7963, "eval_samples_per_second": 125.261, "eval_steps_per_second": 16.145, "step": 1200 }, { "epoch": 21.228070175438596, "grad_norm": 1.5632004737854004, "learning_rate": 6.523716699155295e-05, "loss": 0.1524, "step": 1210 }, { "epoch": 21.403508771929825, "grad_norm": 2.126483201980591, "learning_rate": 6.393762183235867e-05, "loss": 0.1394, "step": 1220 }, { "epoch": 21.57894736842105, "grad_norm": 1.7541165351867676, "learning_rate": 6.263807667316439e-05, "loss": 0.135, "step": 1230 }, { "epoch": 21.75438596491228, "grad_norm": 1.4574304819107056, "learning_rate": 6.133853151397011e-05, "loss": 0.1561, "step": 1240 }, { "epoch": 21.92982456140351, "grad_norm": 0.9761200547218323, "learning_rate": 6.003898635477583e-05, "loss": 0.1488, "step": 1250 }, { "epoch": 22.105263157894736, "grad_norm": 0.8040511012077332, "learning_rate": 5.873944119558155e-05, "loss": 0.1485, "step": 1260 }, { "epoch": 22.280701754385966, "grad_norm": 1.6228233575820923, "learning_rate": 5.743989603638726e-05, "loss": 0.1378, "step": 1270 }, { "epoch": 22.45614035087719, "grad_norm": 0.7623564600944519, "learning_rate": 5.6140350877192984e-05, "loss": 0.1289, "step": 1280 }, { "epoch": 22.63157894736842, "grad_norm": 1.6121069192886353, "learning_rate": 5.48408057179987e-05, "loss": 0.1362, "step": 1290 }, { "epoch": 22.80701754385965, "grad_norm": 3.3042190074920654, "learning_rate": 5.354126055880442e-05, "loss": 0.1397, "step": 1300 }, { "epoch": 22.80701754385965, "eval_accuracy": 0.9497777777777778, "eval_f1": 0.8745837957824639, "eval_loss": 0.13591831922531128, "eval_precision": 0.8794642857142857, "eval_recall": 0.869757174392936, "eval_runtime": 2.7356, "eval_samples_per_second": 82.25, "eval_steps_per_second": 10.601, "step": 1300 }, { "epoch": 22.982456140350877, "grad_norm": 3.5730159282684326, "learning_rate": 5.2241715399610133e-05, "loss": 0.1383, "step": 1310 }, { "epoch": 23.157894736842106, "grad_norm": 1.4815293550491333, "learning_rate": 5.0942170240415855e-05, "loss": 0.1379, "step": 1320 }, { "epoch": 23.333333333333332, "grad_norm": 1.9152294397354126, "learning_rate": 4.964262508122157e-05, "loss": 0.1349, "step": 1330 }, { "epoch": 23.50877192982456, "grad_norm": 2.3605239391326904, "learning_rate": 4.834307992202729e-05, "loss": 0.1366, "step": 1340 }, { "epoch": 23.68421052631579, "grad_norm": 2.1709718704223633, "learning_rate": 4.704353476283301e-05, "loss": 0.1339, "step": 1350 }, { "epoch": 23.859649122807017, "grad_norm": 2.6597700119018555, "learning_rate": 4.5743989603638725e-05, "loss": 0.1527, "step": 1360 }, { "epoch": 24.035087719298247, "grad_norm": 2.7181591987609863, "learning_rate": 4.4444444444444447e-05, "loss": 0.1388, "step": 1370 }, { "epoch": 24.210526315789473, "grad_norm": 2.1271519660949707, "learning_rate": 4.314489928525016e-05, "loss": 0.1177, "step": 1380 }, { "epoch": 24.385964912280702, "grad_norm": 0.7409679889678955, "learning_rate": 4.184535412605588e-05, "loss": 0.1245, "step": 1390 }, { "epoch": 24.56140350877193, "grad_norm": 1.3061821460723877, "learning_rate": 4.0545808966861596e-05, "loss": 0.1398, "step": 1400 }, { "epoch": 24.56140350877193, "eval_accuracy": 0.9488888888888889, "eval_f1": 0.8740416210295728, "eval_loss": 0.13523675501346588, "eval_precision": 0.8673913043478261, "eval_recall": 0.8807947019867549, "eval_runtime": 1.7895, "eval_samples_per_second": 125.734, "eval_steps_per_second": 16.206, "step": 1400 }, { "epoch": 24.736842105263158, "grad_norm": 1.6698417663574219, "learning_rate": 3.924626380766732e-05, "loss": 0.1586, "step": 1410 }, { "epoch": 24.912280701754387, "grad_norm": 1.0285630226135254, "learning_rate": 3.794671864847303e-05, "loss": 0.1303, "step": 1420 }, { "epoch": 25.087719298245613, "grad_norm": 0.6575556993484497, "learning_rate": 3.664717348927875e-05, "loss": 0.1781, "step": 1430 }, { "epoch": 25.263157894736842, "grad_norm": 2.197866678237915, "learning_rate": 3.534762833008447e-05, "loss": 0.1271, "step": 1440 }, { "epoch": 25.43859649122807, "grad_norm": 2.487614154815674, "learning_rate": 3.404808317089019e-05, "loss": 0.1551, "step": 1450 }, { "epoch": 25.614035087719298, "grad_norm": 1.7481322288513184, "learning_rate": 3.274853801169591e-05, "loss": 0.149, "step": 1460 }, { "epoch": 25.789473684210527, "grad_norm": 3.8675453662872314, "learning_rate": 3.1448992852501624e-05, "loss": 0.14, "step": 1470 }, { "epoch": 25.964912280701753, "grad_norm": 2.154574394226074, "learning_rate": 3.014944769330734e-05, "loss": 0.1298, "step": 1480 }, { "epoch": 26.140350877192983, "grad_norm": 1.4921404123306274, "learning_rate": 2.884990253411306e-05, "loss": 0.1224, "step": 1490 }, { "epoch": 26.31578947368421, "grad_norm": 5.314982891082764, "learning_rate": 2.7550357374918777e-05, "loss": 0.1276, "step": 1500 }, { "epoch": 26.31578947368421, "eval_accuracy": 0.9475555555555556, "eval_f1": 0.8700440528634361, "eval_loss": 0.13810041546821594, "eval_precision": 0.8681318681318682, "eval_recall": 0.8719646799116998, "eval_runtime": 1.7873, "eval_samples_per_second": 125.886, "eval_steps_per_second": 16.225, "step": 1500 }, { "epoch": 26.49122807017544, "grad_norm": 0.9850056171417236, "learning_rate": 2.6250812215724495e-05, "loss": 0.1556, "step": 1510 }, { "epoch": 26.666666666666668, "grad_norm": 1.0374152660369873, "learning_rate": 2.4951267056530216e-05, "loss": 0.1331, "step": 1520 }, { "epoch": 26.842105263157894, "grad_norm": 1.3075968027114868, "learning_rate": 2.3651721897335933e-05, "loss": 0.1168, "step": 1530 }, { "epoch": 27.017543859649123, "grad_norm": 3.286419153213501, "learning_rate": 2.235217673814165e-05, "loss": 0.1841, "step": 1540 }, { "epoch": 27.19298245614035, "grad_norm": 8.424769401550293, "learning_rate": 2.105263157894737e-05, "loss": 0.1412, "step": 1550 }, { "epoch": 27.36842105263158, "grad_norm": 1.265550971031189, "learning_rate": 1.9753086419753087e-05, "loss": 0.1428, "step": 1560 }, { "epoch": 27.54385964912281, "grad_norm": 1.605952501296997, "learning_rate": 1.8453541260558804e-05, "loss": 0.1249, "step": 1570 }, { "epoch": 27.719298245614034, "grad_norm": 0.9456580877304077, "learning_rate": 1.7153996101364522e-05, "loss": 0.1219, "step": 1580 }, { "epoch": 27.894736842105264, "grad_norm": 1.4221025705337524, "learning_rate": 1.585445094217024e-05, "loss": 0.1357, "step": 1590 }, { "epoch": 28.07017543859649, "grad_norm": 1.6127928495407104, "learning_rate": 1.4554905782975957e-05, "loss": 0.1519, "step": 1600 }, { "epoch": 28.07017543859649, "eval_accuracy": 0.9462222222222222, "eval_f1": 0.8665931642778391, "eval_loss": 0.1380426585674286, "eval_precision": 0.8656387665198237, "eval_recall": 0.8675496688741722, "eval_runtime": 2.7792, "eval_samples_per_second": 80.959, "eval_steps_per_second": 10.435, "step": 1600 }, { "epoch": 28.24561403508772, "grad_norm": 0.9390941858291626, "learning_rate": 1.3255360623781677e-05, "loss": 0.1366, "step": 1610 }, { "epoch": 28.42105263157895, "grad_norm": 0.6705310940742493, "learning_rate": 1.1955815464587395e-05, "loss": 0.1169, "step": 1620 }, { "epoch": 28.596491228070175, "grad_norm": 1.729995846748352, "learning_rate": 1.0656270305393112e-05, "loss": 0.1283, "step": 1630 }, { "epoch": 28.771929824561404, "grad_norm": 1.126247763633728, "learning_rate": 9.35672514619883e-06, "loss": 0.1282, "step": 1640 }, { "epoch": 28.94736842105263, "grad_norm": 1.3951548337936401, "learning_rate": 8.057179987004548e-06, "loss": 0.1335, "step": 1650 }, { "epoch": 29.12280701754386, "grad_norm": 1.6437804698944092, "learning_rate": 6.757634827810266e-06, "loss": 0.1339, "step": 1660 }, { "epoch": 29.29824561403509, "grad_norm": 1.941601037979126, "learning_rate": 5.458089668615984e-06, "loss": 0.1208, "step": 1670 }, { "epoch": 29.473684210526315, "grad_norm": 1.310896873474121, "learning_rate": 4.158544509421703e-06, "loss": 0.1373, "step": 1680 }, { "epoch": 29.649122807017545, "grad_norm": 1.1967781782150269, "learning_rate": 2.8589993502274203e-06, "loss": 0.1337, "step": 1690 }, { "epoch": 29.82456140350877, "grad_norm": 2.3011374473571777, "learning_rate": 1.5594541910331385e-06, "loss": 0.1479, "step": 1700 }, { "epoch": 29.82456140350877, "eval_accuracy": 0.9497777777777778, "eval_f1": 0.8754134509371555, "eval_loss": 0.13432377576828003, "eval_precision": 0.8744493392070485, "eval_recall": 0.8763796909492274, "eval_runtime": 1.7945, "eval_samples_per_second": 125.384, "eval_steps_per_second": 16.161, "step": 1700 }, { "epoch": 30.0, "grad_norm": 4.079135417938232, "learning_rate": 2.599090318388564e-07, "loss": 0.12, "step": 1710 }, { "epoch": 30.0, "step": 1710, "total_flos": 3.8465920659456e+16, "train_loss": 0.1578066376914755, "train_runtime": 318.4633, "train_samples_per_second": 84.782, "train_steps_per_second": 5.37 } ], "logging_steps": 10, "max_steps": 1710, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.8465920659456e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }