{ "best_metric": 0.9857904085257548, "best_model_checkpoint": "10-finetuned-ausSpiders2000/checkpoint-2535", "epoch": 9.97338065661047, "eval_steps": 500, "global_step": 2810, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.7793594306049825e-05, "loss": 2.4863, "step": 10 }, { "epoch": 0.07, "learning_rate": 3.558718861209965e-05, "loss": 2.1753, "step": 20 }, { "epoch": 0.11, "learning_rate": 5.338078291814947e-05, "loss": 1.6356, "step": 30 }, { "epoch": 0.14, "learning_rate": 7.11743772241993e-05, "loss": 0.9598, "step": 40 }, { "epoch": 0.18, "learning_rate": 8.896797153024912e-05, "loss": 0.5263, "step": 50 }, { "epoch": 0.21, "learning_rate": 0.00010676156583629893, "loss": 0.2692, "step": 60 }, { "epoch": 0.25, "learning_rate": 0.00012455516014234875, "loss": 0.233, "step": 70 }, { "epoch": 0.28, "learning_rate": 0.0001423487544483986, "loss": 0.1485, "step": 80 }, { "epoch": 0.32, "learning_rate": 0.00016014234875444842, "loss": 0.148, "step": 90 }, { "epoch": 0.35, "learning_rate": 0.00017793594306049823, "loss": 0.1282, "step": 100 }, { "epoch": 0.39, "learning_rate": 0.00019572953736654805, "loss": 0.1211, "step": 110 }, { "epoch": 0.43, "learning_rate": 0.00021352313167259787, "loss": 0.1153, "step": 120 }, { "epoch": 0.46, "learning_rate": 0.0002313167259786477, "loss": 0.1512, "step": 130 }, { "epoch": 0.5, "learning_rate": 0.0002491103202846975, "loss": 0.1465, "step": 140 }, { "epoch": 0.53, "learning_rate": 0.0002669039145907473, "loss": 0.1391, "step": 150 }, { "epoch": 0.57, "learning_rate": 0.0002846975088967972, "loss": 0.099, "step": 160 }, { "epoch": 0.6, "learning_rate": 0.000302491103202847, "loss": 0.1788, "step": 170 }, { "epoch": 0.64, "learning_rate": 0.00032028469750889683, "loss": 0.1824, "step": 180 }, { "epoch": 0.67, "learning_rate": 0.0003380782918149466, "loss": 0.1867, "step": 190 }, { "epoch": 0.71, "learning_rate": 0.00035587188612099647, "loss": 0.1699, "step": 200 }, { "epoch": 0.75, "learning_rate": 0.0003736654804270463, "loss": 0.1658, "step": 210 }, { "epoch": 0.78, "learning_rate": 0.0003914590747330961, "loss": 0.1465, "step": 220 }, { "epoch": 0.82, "learning_rate": 0.00040925266903914595, "loss": 0.173, "step": 230 }, { "epoch": 0.85, "learning_rate": 0.00042704626334519574, "loss": 0.2829, "step": 240 }, { "epoch": 0.89, "learning_rate": 0.0004448398576512456, "loss": 0.1879, "step": 250 }, { "epoch": 0.92, "learning_rate": 0.0004626334519572954, "loss": 0.2341, "step": 260 }, { "epoch": 0.96, "learning_rate": 0.0004804270462633452, "loss": 0.2539, "step": 270 }, { "epoch": 0.99, "learning_rate": 0.000498220640569395, "loss": 0.1997, "step": 280 }, { "epoch": 1.0, "eval_accuracy": 0.9280639431616341, "eval_f1": 0.9044835301236254, "eval_loss": 0.2264249473810196, "eval_precision": 0.9395261094135103, "eval_recall": 0.8868521981209923, "eval_runtime": 40.9312, "eval_samples_per_second": 27.51, "eval_steps_per_second": 3.445, "step": 281 }, { "epoch": 1.03, "learning_rate": 0.000498220640569395, "loss": 0.1887, "step": 290 }, { "epoch": 1.06, "learning_rate": 0.0004962435745353895, "loss": 0.227, "step": 300 }, { "epoch": 1.1, "learning_rate": 0.0004942665085013839, "loss": 0.1633, "step": 310 }, { "epoch": 1.14, "learning_rate": 0.0004922894424673784, "loss": 0.2611, "step": 320 }, { "epoch": 1.17, "learning_rate": 0.0004903123764333729, "loss": 0.1793, "step": 330 }, { "epoch": 1.21, "learning_rate": 0.0004883353103993673, "loss": 0.1217, "step": 340 }, { "epoch": 1.24, "learning_rate": 0.0004863582443653618, "loss": 0.2545, "step": 350 }, { "epoch": 1.28, "learning_rate": 0.00048438117833135623, "loss": 0.3059, "step": 360 }, { "epoch": 1.31, "learning_rate": 0.00048240411229735075, "loss": 0.2185, "step": 370 }, { "epoch": 1.35, "learning_rate": 0.0004804270462633452, "loss": 0.1968, "step": 380 }, { "epoch": 1.38, "learning_rate": 0.0004784499802293397, "loss": 0.2301, "step": 390 }, { "epoch": 1.42, "learning_rate": 0.0004764729141953341, "loss": 0.3476, "step": 400 }, { "epoch": 1.46, "learning_rate": 0.0004744958481613286, "loss": 0.2723, "step": 410 }, { "epoch": 1.49, "learning_rate": 0.0004725187821273231, "loss": 0.2253, "step": 420 }, { "epoch": 1.53, "learning_rate": 0.0004705417160933175, "loss": 0.1927, "step": 430 }, { "epoch": 1.56, "learning_rate": 0.00046856465005931197, "loss": 0.2396, "step": 440 }, { "epoch": 1.6, "learning_rate": 0.00046658758402530644, "loss": 0.2278, "step": 450 }, { "epoch": 1.63, "learning_rate": 0.00046461051799130096, "loss": 0.275, "step": 460 }, { "epoch": 1.67, "learning_rate": 0.0004626334519572954, "loss": 0.1884, "step": 470 }, { "epoch": 1.7, "learning_rate": 0.00046065638592328984, "loss": 0.2211, "step": 480 }, { "epoch": 1.74, "learning_rate": 0.0004586793198892843, "loss": 0.2202, "step": 490 }, { "epoch": 1.77, "learning_rate": 0.0004567022538552787, "loss": 0.2851, "step": 500 }, { "epoch": 1.81, "learning_rate": 0.00045472518782127324, "loss": 0.2128, "step": 510 }, { "epoch": 1.85, "learning_rate": 0.0004527481217872677, "loss": 0.2074, "step": 520 }, { "epoch": 1.88, "learning_rate": 0.0004507710557532622, "loss": 0.2324, "step": 530 }, { "epoch": 1.92, "learning_rate": 0.0004487939897192566, "loss": 0.1803, "step": 540 }, { "epoch": 1.95, "learning_rate": 0.0004468169236852511, "loss": 0.209, "step": 550 }, { "epoch": 1.99, "learning_rate": 0.0004448398576512456, "loss": 0.17, "step": 560 }, { "epoch": 2.0, "eval_accuracy": 0.9564831261101243, "eval_f1": 0.8380915867647825, "eval_loss": 0.13818518817424774, "eval_precision": 0.8539785317750744, "eval_recall": 0.8265891631354911, "eval_runtime": 39.8834, "eval_samples_per_second": 28.232, "eval_steps_per_second": 3.535, "step": 563 }, { "epoch": 2.02, "learning_rate": 0.00044286279161724005, "loss": 0.2291, "step": 570 }, { "epoch": 2.06, "learning_rate": 0.00044088572558323446, "loss": 0.1695, "step": 580 }, { "epoch": 2.09, "learning_rate": 0.00043890865954922893, "loss": 0.2086, "step": 590 }, { "epoch": 2.13, "learning_rate": 0.00043693159351522345, "loss": 0.1849, "step": 600 }, { "epoch": 2.17, "learning_rate": 0.00043495452748121787, "loss": 0.2532, "step": 610 }, { "epoch": 2.2, "learning_rate": 0.00043297746144721233, "loss": 0.2137, "step": 620 }, { "epoch": 2.24, "learning_rate": 0.0004310003954132068, "loss": 0.1389, "step": 630 }, { "epoch": 2.27, "learning_rate": 0.0004290233293792013, "loss": 0.2308, "step": 640 }, { "epoch": 2.31, "learning_rate": 0.00042704626334519574, "loss": 0.1713, "step": 650 }, { "epoch": 2.34, "learning_rate": 0.0004250691973111902, "loss": 0.1593, "step": 660 }, { "epoch": 2.38, "learning_rate": 0.0004230921312771847, "loss": 0.199, "step": 670 }, { "epoch": 2.41, "learning_rate": 0.0004211150652431791, "loss": 0.2582, "step": 680 }, { "epoch": 2.45, "learning_rate": 0.0004191379992091736, "loss": 0.182, "step": 690 }, { "epoch": 2.48, "learning_rate": 0.0004171609331751681, "loss": 0.2524, "step": 700 }, { "epoch": 2.52, "learning_rate": 0.00041518386714116254, "loss": 0.1836, "step": 710 }, { "epoch": 2.56, "learning_rate": 0.00041320680110715696, "loss": 0.1668, "step": 720 }, { "epoch": 2.59, "learning_rate": 0.0004112297350731515, "loss": 0.2326, "step": 730 }, { "epoch": 2.63, "learning_rate": 0.00040925266903914595, "loss": 0.1824, "step": 740 }, { "epoch": 2.66, "learning_rate": 0.00040727560300514036, "loss": 0.1817, "step": 750 }, { "epoch": 2.7, "learning_rate": 0.00040529853697113483, "loss": 0.1547, "step": 760 }, { "epoch": 2.73, "learning_rate": 0.0004033214709371293, "loss": 0.1652, "step": 770 }, { "epoch": 2.77, "learning_rate": 0.0004013444049031238, "loss": 0.2278, "step": 780 }, { "epoch": 2.8, "learning_rate": 0.00039936733886911823, "loss": 0.2369, "step": 790 }, { "epoch": 2.84, "learning_rate": 0.0003973902728351127, "loss": 0.1947, "step": 800 }, { "epoch": 2.87, "learning_rate": 0.00039541320680110717, "loss": 0.1326, "step": 810 }, { "epoch": 2.91, "learning_rate": 0.0003934361407671016, "loss": 0.1644, "step": 820 }, { "epoch": 2.95, "learning_rate": 0.0003914590747330961, "loss": 0.1888, "step": 830 }, { "epoch": 2.98, "learning_rate": 0.00038948200869909057, "loss": 0.21, "step": 840 }, { "epoch": 3.0, "eval_accuracy": 0.9582593250444049, "eval_f1": 0.9348978364444572, "eval_loss": 0.14037781953811646, "eval_precision": 0.9746987945146139, "eval_recall": 0.9063700900026892, "eval_runtime": 39.7299, "eval_samples_per_second": 28.341, "eval_steps_per_second": 3.549, "step": 845 }, { "epoch": 3.02, "learning_rate": 0.00038750494266508504, "loss": 0.129, "step": 850 }, { "epoch": 3.05, "learning_rate": 0.00038552787663107945, "loss": 0.1675, "step": 860 }, { "epoch": 3.09, "learning_rate": 0.000383550810597074, "loss": 0.1136, "step": 870 }, { "epoch": 3.12, "learning_rate": 0.00038157374456306844, "loss": 0.1937, "step": 880 }, { "epoch": 3.16, "learning_rate": 0.00037959667852906285, "loss": 0.1219, "step": 890 }, { "epoch": 3.19, "learning_rate": 0.0003776196124950573, "loss": 0.1394, "step": 900 }, { "epoch": 3.23, "learning_rate": 0.0003756425464610518, "loss": 0.1505, "step": 910 }, { "epoch": 3.27, "learning_rate": 0.0003736654804270463, "loss": 0.1487, "step": 920 }, { "epoch": 3.3, "learning_rate": 0.0003716884143930407, "loss": 0.1228, "step": 930 }, { "epoch": 3.34, "learning_rate": 0.0003697113483590352, "loss": 0.185, "step": 940 }, { "epoch": 3.37, "learning_rate": 0.00036773428232502966, "loss": 0.1313, "step": 950 }, { "epoch": 3.41, "learning_rate": 0.00036575721629102413, "loss": 0.1977, "step": 960 }, { "epoch": 3.44, "learning_rate": 0.0003637801502570186, "loss": 0.2188, "step": 970 }, { "epoch": 3.48, "learning_rate": 0.00036180308422301306, "loss": 0.1717, "step": 980 }, { "epoch": 3.51, "learning_rate": 0.00035982601818900753, "loss": 0.1357, "step": 990 }, { "epoch": 3.55, "learning_rate": 0.00035784895215500194, "loss": 0.1386, "step": 1000 }, { "epoch": 3.58, "learning_rate": 0.00035587188612099647, "loss": 0.169, "step": 1010 }, { "epoch": 3.62, "learning_rate": 0.00035389482008699093, "loss": 0.164, "step": 1020 }, { "epoch": 3.66, "learning_rate": 0.00035191775405298535, "loss": 0.1334, "step": 1030 }, { "epoch": 3.69, "learning_rate": 0.0003499406880189798, "loss": 0.1634, "step": 1040 }, { "epoch": 3.73, "learning_rate": 0.00034796362198497434, "loss": 0.1614, "step": 1050 }, { "epoch": 3.76, "learning_rate": 0.0003459865559509688, "loss": 0.1819, "step": 1060 }, { "epoch": 3.8, "learning_rate": 0.0003440094899169632, "loss": 0.1796, "step": 1070 }, { "epoch": 3.83, "learning_rate": 0.0003420324238829577, "loss": 0.1341, "step": 1080 }, { "epoch": 3.87, "learning_rate": 0.00034005535784895215, "loss": 0.127, "step": 1090 }, { "epoch": 3.9, "learning_rate": 0.0003380782918149466, "loss": 0.1709, "step": 1100 }, { "epoch": 3.94, "learning_rate": 0.0003361012257809411, "loss": 0.2003, "step": 1110 }, { "epoch": 3.98, "learning_rate": 0.00033412415974693556, "loss": 0.1976, "step": 1120 }, { "epoch": 4.0, "eval_accuracy": 0.9689165186500888, "eval_f1": 0.912801315494264, "eval_loss": 0.09873761981725693, "eval_precision": 0.9716016779252258, "eval_recall": 0.8917025082468766, "eval_runtime": 39.8355, "eval_samples_per_second": 28.266, "eval_steps_per_second": 3.54, "step": 1127 }, { "epoch": 4.01, "learning_rate": 0.00033214709371293, "loss": 0.1981, "step": 1130 }, { "epoch": 4.05, "learning_rate": 0.00033017002767892444, "loss": 0.1504, "step": 1140 }, { "epoch": 4.08, "learning_rate": 0.00032819296164491896, "loss": 0.1655, "step": 1150 }, { "epoch": 4.12, "learning_rate": 0.00032621589561091343, "loss": 0.0908, "step": 1160 }, { "epoch": 4.15, "learning_rate": 0.00032423882957690784, "loss": 0.1043, "step": 1170 }, { "epoch": 4.19, "learning_rate": 0.0003222617635429023, "loss": 0.1908, "step": 1180 }, { "epoch": 4.22, "learning_rate": 0.00032028469750889683, "loss": 0.1202, "step": 1190 }, { "epoch": 4.26, "learning_rate": 0.0003183076314748913, "loss": 0.1597, "step": 1200 }, { "epoch": 4.29, "learning_rate": 0.0003163305654408857, "loss": 0.1318, "step": 1210 }, { "epoch": 4.33, "learning_rate": 0.0003143534994068802, "loss": 0.1674, "step": 1220 }, { "epoch": 4.37, "learning_rate": 0.00031237643337287465, "loss": 0.1218, "step": 1230 }, { "epoch": 4.4, "learning_rate": 0.00031039936733886917, "loss": 0.156, "step": 1240 }, { "epoch": 4.44, "learning_rate": 0.0003084223013048636, "loss": 0.1314, "step": 1250 }, { "epoch": 4.47, "learning_rate": 0.00030644523527085805, "loss": 0.1817, "step": 1260 }, { "epoch": 4.51, "learning_rate": 0.0003044681692368525, "loss": 0.187, "step": 1270 }, { "epoch": 4.54, "learning_rate": 0.000302491103202847, "loss": 0.1318, "step": 1280 }, { "epoch": 4.58, "learning_rate": 0.00030051403716884145, "loss": 0.1806, "step": 1290 }, { "epoch": 4.61, "learning_rate": 0.0002985369711348359, "loss": 0.1614, "step": 1300 }, { "epoch": 4.65, "learning_rate": 0.0002965599051008304, "loss": 0.1221, "step": 1310 }, { "epoch": 4.69, "learning_rate": 0.0002945828390668248, "loss": 0.1391, "step": 1320 }, { "epoch": 4.72, "learning_rate": 0.0002926057730328193, "loss": 0.1275, "step": 1330 }, { "epoch": 4.76, "learning_rate": 0.0002906287069988138, "loss": 0.1265, "step": 1340 }, { "epoch": 4.79, "learning_rate": 0.0002886516409648082, "loss": 0.1637, "step": 1350 }, { "epoch": 4.83, "learning_rate": 0.0002866745749308027, "loss": 0.1833, "step": 1360 }, { "epoch": 4.86, "learning_rate": 0.0002846975088967972, "loss": 0.1235, "step": 1370 }, { "epoch": 4.9, "learning_rate": 0.00028272044286279166, "loss": 0.0997, "step": 1380 }, { "epoch": 4.93, "learning_rate": 0.0002807433768287861, "loss": 0.0879, "step": 1390 }, { "epoch": 4.97, "learning_rate": 0.00027876631079478054, "loss": 0.178, "step": 1400 }, { "epoch": 5.0, "eval_accuracy": 0.9635879218472468, "eval_f1": 0.9471608472683106, "eval_loss": 0.12194248288869858, "eval_precision": 0.9393499332363083, "eval_recall": 0.9600466296790979, "eval_runtime": 39.8939, "eval_samples_per_second": 28.225, "eval_steps_per_second": 3.534, "step": 1408 }, { "epoch": 5.0, "learning_rate": 0.000276789244760775, "loss": 0.0922, "step": 1410 }, { "epoch": 5.04, "learning_rate": 0.0002748121787267695, "loss": 0.0684, "step": 1420 }, { "epoch": 5.08, "learning_rate": 0.00027283511269276395, "loss": 0.1164, "step": 1430 }, { "epoch": 5.11, "learning_rate": 0.0002708580466587584, "loss": 0.0982, "step": 1440 }, { "epoch": 5.15, "learning_rate": 0.0002688809806247529, "loss": 0.1041, "step": 1450 }, { "epoch": 5.18, "learning_rate": 0.0002669039145907473, "loss": 0.1143, "step": 1460 }, { "epoch": 5.22, "learning_rate": 0.0002649268485567418, "loss": 0.1927, "step": 1470 }, { "epoch": 5.25, "learning_rate": 0.0002629497825227363, "loss": 0.1207, "step": 1480 }, { "epoch": 5.29, "learning_rate": 0.0002609727164887307, "loss": 0.1076, "step": 1490 }, { "epoch": 5.32, "learning_rate": 0.00025899565045472517, "loss": 0.1068, "step": 1500 }, { "epoch": 5.36, "learning_rate": 0.0002570185844207197, "loss": 0.1254, "step": 1510 }, { "epoch": 5.39, "learning_rate": 0.00025504151838671416, "loss": 0.0679, "step": 1520 }, { "epoch": 5.43, "learning_rate": 0.00025306445235270857, "loss": 0.12, "step": 1530 }, { "epoch": 5.47, "learning_rate": 0.00025108738631870304, "loss": 0.1157, "step": 1540 }, { "epoch": 5.5, "learning_rate": 0.0002491103202846975, "loss": 0.1034, "step": 1550 }, { "epoch": 5.54, "learning_rate": 0.00024713325425069197, "loss": 0.1317, "step": 1560 }, { "epoch": 5.57, "learning_rate": 0.00024515618821668644, "loss": 0.0942, "step": 1570 }, { "epoch": 5.61, "learning_rate": 0.0002431791221826809, "loss": 0.1381, "step": 1580 }, { "epoch": 5.64, "learning_rate": 0.00024120205614867538, "loss": 0.1109, "step": 1590 }, { "epoch": 5.68, "learning_rate": 0.00023922499011466984, "loss": 0.1132, "step": 1600 }, { "epoch": 5.71, "learning_rate": 0.0002372479240806643, "loss": 0.0736, "step": 1610 }, { "epoch": 5.75, "learning_rate": 0.00023527085804665875, "loss": 0.1418, "step": 1620 }, { "epoch": 5.79, "learning_rate": 0.00023329379201265322, "loss": 0.1318, "step": 1630 }, { "epoch": 5.82, "learning_rate": 0.0002313167259786477, "loss": 0.1313, "step": 1640 }, { "epoch": 5.86, "learning_rate": 0.00022933965994464215, "loss": 0.1749, "step": 1650 }, { "epoch": 5.89, "learning_rate": 0.00022736259391063662, "loss": 0.1526, "step": 1660 }, { "epoch": 5.93, "learning_rate": 0.0002253855278766311, "loss": 0.0911, "step": 1670 }, { "epoch": 5.96, "learning_rate": 0.00022340846184262556, "loss": 0.1377, "step": 1680 }, { "epoch": 6.0, "learning_rate": 0.00022143139580862003, "loss": 0.0659, "step": 1690 }, { "epoch": 6.0, "eval_accuracy": 0.9813499111900533, "eval_f1": 0.9807178013915328, "eval_loss": 0.08037910610437393, "eval_precision": 0.9815439633215112, "eval_recall": 0.9801148444477591, "eval_runtime": 39.9217, "eval_samples_per_second": 28.205, "eval_steps_per_second": 3.532, "step": 1690 }, { "epoch": 6.03, "learning_rate": 0.00021945432977461447, "loss": 0.1303, "step": 1700 }, { "epoch": 6.07, "learning_rate": 0.00021747726374060893, "loss": 0.0513, "step": 1710 }, { "epoch": 6.1, "learning_rate": 0.0002155001977066034, "loss": 0.0856, "step": 1720 }, { "epoch": 6.14, "learning_rate": 0.00021352313167259787, "loss": 0.1485, "step": 1730 }, { "epoch": 6.18, "learning_rate": 0.00021154606563859234, "loss": 0.118, "step": 1740 }, { "epoch": 6.21, "learning_rate": 0.0002095689996045868, "loss": 0.0684, "step": 1750 }, { "epoch": 6.25, "learning_rate": 0.00020759193357058127, "loss": 0.1394, "step": 1760 }, { "epoch": 6.28, "learning_rate": 0.00020561486753657574, "loss": 0.1556, "step": 1770 }, { "epoch": 6.32, "learning_rate": 0.00020363780150257018, "loss": 0.1246, "step": 1780 }, { "epoch": 6.35, "learning_rate": 0.00020166073546856465, "loss": 0.1257, "step": 1790 }, { "epoch": 6.39, "learning_rate": 0.00019968366943455912, "loss": 0.17, "step": 1800 }, { "epoch": 6.42, "learning_rate": 0.00019770660340055358, "loss": 0.0932, "step": 1810 }, { "epoch": 6.46, "learning_rate": 0.00019572953736654805, "loss": 0.1288, "step": 1820 }, { "epoch": 6.5, "learning_rate": 0.00019375247133254252, "loss": 0.0677, "step": 1830 }, { "epoch": 6.53, "learning_rate": 0.000191775405298537, "loss": 0.1177, "step": 1840 }, { "epoch": 6.57, "learning_rate": 0.00018979833926453143, "loss": 0.1093, "step": 1850 }, { "epoch": 6.6, "learning_rate": 0.0001878212732305259, "loss": 0.1438, "step": 1860 }, { "epoch": 6.64, "learning_rate": 0.00018584420719652036, "loss": 0.0854, "step": 1870 }, { "epoch": 6.67, "learning_rate": 0.00018386714116251483, "loss": 0.0865, "step": 1880 }, { "epoch": 6.71, "learning_rate": 0.0001818900751285093, "loss": 0.1254, "step": 1890 }, { "epoch": 6.74, "learning_rate": 0.00017991300909450377, "loss": 0.0999, "step": 1900 }, { "epoch": 6.78, "learning_rate": 0.00017793594306049823, "loss": 0.1072, "step": 1910 }, { "epoch": 6.81, "learning_rate": 0.00017595887702649267, "loss": 0.1191, "step": 1920 }, { "epoch": 6.85, "learning_rate": 0.00017398181099248717, "loss": 0.1147, "step": 1930 }, { "epoch": 6.89, "learning_rate": 0.0001720047449584816, "loss": 0.1028, "step": 1940 }, { "epoch": 6.92, "learning_rate": 0.00017002767892447608, "loss": 0.0695, "step": 1950 }, { "epoch": 6.96, "learning_rate": 0.00016805061289047054, "loss": 0.0913, "step": 1960 }, { "epoch": 6.99, "learning_rate": 0.000166073546856465, "loss": 0.0917, "step": 1970 }, { "epoch": 7.0, "eval_accuracy": 0.9733570159857904, "eval_f1": 0.9715717407405214, "eval_loss": 0.10624586045742035, "eval_precision": 0.9764811708120374, "eval_recall": 0.9675866233506976, "eval_runtime": 39.8459, "eval_samples_per_second": 28.259, "eval_steps_per_second": 3.539, "step": 1972 }, { "epoch": 7.03, "learning_rate": 0.00016409648082245948, "loss": 0.1021, "step": 1980 }, { "epoch": 7.06, "learning_rate": 0.00016211941478845392, "loss": 0.0706, "step": 1990 }, { "epoch": 7.1, "learning_rate": 0.00016014234875444842, "loss": 0.0677, "step": 2000 }, { "epoch": 7.13, "learning_rate": 0.00015816528272044286, "loss": 0.0857, "step": 2010 }, { "epoch": 7.17, "learning_rate": 0.00015618821668643732, "loss": 0.0865, "step": 2020 }, { "epoch": 7.2, "learning_rate": 0.0001542111506524318, "loss": 0.0641, "step": 2030 }, { "epoch": 7.24, "learning_rate": 0.00015223408461842626, "loss": 0.0446, "step": 2040 }, { "epoch": 7.28, "learning_rate": 0.00015025701858442073, "loss": 0.053, "step": 2050 }, { "epoch": 7.31, "learning_rate": 0.0001482799525504152, "loss": 0.087, "step": 2060 }, { "epoch": 7.35, "learning_rate": 0.00014630288651640966, "loss": 0.0873, "step": 2070 }, { "epoch": 7.38, "learning_rate": 0.0001443258204824041, "loss": 0.099, "step": 2080 }, { "epoch": 7.42, "learning_rate": 0.0001423487544483986, "loss": 0.1161, "step": 2090 }, { "epoch": 7.45, "learning_rate": 0.00014037168841439304, "loss": 0.0951, "step": 2100 }, { "epoch": 7.49, "learning_rate": 0.0001383946223803875, "loss": 0.0605, "step": 2110 }, { "epoch": 7.52, "learning_rate": 0.00013641755634638197, "loss": 0.0857, "step": 2120 }, { "epoch": 7.56, "learning_rate": 0.00013444049031237644, "loss": 0.1027, "step": 2130 }, { "epoch": 7.6, "learning_rate": 0.0001324634242783709, "loss": 0.1222, "step": 2140 }, { "epoch": 7.63, "learning_rate": 0.00013048635824436535, "loss": 0.1055, "step": 2150 }, { "epoch": 7.67, "learning_rate": 0.00012850929221035984, "loss": 0.1326, "step": 2160 }, { "epoch": 7.7, "learning_rate": 0.00012653222617635428, "loss": 0.0953, "step": 2170 }, { "epoch": 7.74, "learning_rate": 0.00012455516014234875, "loss": 0.0672, "step": 2180 }, { "epoch": 7.77, "learning_rate": 0.00012257809410834322, "loss": 0.0827, "step": 2190 }, { "epoch": 7.81, "learning_rate": 0.00012060102807433769, "loss": 0.0651, "step": 2200 }, { "epoch": 7.84, "learning_rate": 0.00011862396204033216, "loss": 0.0662, "step": 2210 }, { "epoch": 7.88, "learning_rate": 0.00011664689600632661, "loss": 0.1431, "step": 2220 }, { "epoch": 7.91, "learning_rate": 0.00011466982997232108, "loss": 0.0315, "step": 2230 }, { "epoch": 7.95, "learning_rate": 0.00011269276393831554, "loss": 0.0996, "step": 2240 }, { "epoch": 7.99, "learning_rate": 0.00011071569790431001, "loss": 0.108, "step": 2250 }, { "epoch": 8.0, "eval_accuracy": 0.9849023090586145, "eval_f1": 0.9828334388632164, "eval_loss": 0.05676717311143875, "eval_precision": 0.9868491250375566, "eval_recall": 0.9794172540958566, "eval_runtime": 39.7132, "eval_samples_per_second": 28.353, "eval_steps_per_second": 3.55, "step": 2254 }, { "epoch": 8.02, "learning_rate": 0.00010873863187030447, "loss": 0.0307, "step": 2260 }, { "epoch": 8.06, "learning_rate": 0.00010676156583629893, "loss": 0.0847, "step": 2270 }, { "epoch": 8.09, "learning_rate": 0.0001047844998022934, "loss": 0.1177, "step": 2280 }, { "epoch": 8.13, "learning_rate": 0.00010280743376828787, "loss": 0.0689, "step": 2290 }, { "epoch": 8.16, "learning_rate": 0.00010083036773428232, "loss": 0.0873, "step": 2300 }, { "epoch": 8.2, "learning_rate": 9.885330170027679e-05, "loss": 0.1209, "step": 2310 }, { "epoch": 8.23, "learning_rate": 9.687623566627126e-05, "loss": 0.0488, "step": 2320 }, { "epoch": 8.27, "learning_rate": 9.489916963226571e-05, "loss": 0.0558, "step": 2330 }, { "epoch": 8.31, "learning_rate": 9.292210359826018e-05, "loss": 0.067, "step": 2340 }, { "epoch": 8.34, "learning_rate": 9.094503756425465e-05, "loss": 0.13, "step": 2350 }, { "epoch": 8.38, "learning_rate": 8.896797153024912e-05, "loss": 0.0743, "step": 2360 }, { "epoch": 8.41, "learning_rate": 8.699090549624358e-05, "loss": 0.0797, "step": 2370 }, { "epoch": 8.45, "learning_rate": 8.501383946223804e-05, "loss": 0.0913, "step": 2380 }, { "epoch": 8.48, "learning_rate": 8.30367734282325e-05, "loss": 0.0772, "step": 2390 }, { "epoch": 8.52, "learning_rate": 8.105970739422696e-05, "loss": 0.072, "step": 2400 }, { "epoch": 8.55, "learning_rate": 7.908264136022143e-05, "loss": 0.0566, "step": 2410 }, { "epoch": 8.59, "learning_rate": 7.71055753262159e-05, "loss": 0.1198, "step": 2420 }, { "epoch": 8.62, "learning_rate": 7.512850929221036e-05, "loss": 0.0647, "step": 2430 }, { "epoch": 8.66, "learning_rate": 7.315144325820483e-05, "loss": 0.1079, "step": 2440 }, { "epoch": 8.7, "learning_rate": 7.11743772241993e-05, "loss": 0.0792, "step": 2450 }, { "epoch": 8.73, "learning_rate": 6.919731119019375e-05, "loss": 0.0712, "step": 2460 }, { "epoch": 8.77, "learning_rate": 6.722024515618822e-05, "loss": 0.0707, "step": 2470 }, { "epoch": 8.8, "learning_rate": 6.524317912218267e-05, "loss": 0.0571, "step": 2480 }, { "epoch": 8.84, "learning_rate": 6.326611308817714e-05, "loss": 0.0802, "step": 2490 }, { "epoch": 8.87, "learning_rate": 6.128904705417161e-05, "loss": 0.0533, "step": 2500 }, { "epoch": 8.91, "learning_rate": 5.931198102016608e-05, "loss": 0.078, "step": 2510 }, { "epoch": 8.94, "learning_rate": 5.733491498616054e-05, "loss": 0.0795, "step": 2520 }, { "epoch": 8.98, "learning_rate": 5.5357848952155006e-05, "loss": 0.1151, "step": 2530 }, { "epoch": 9.0, "eval_accuracy": 0.9857904085257548, "eval_f1": 0.9868558448721283, "eval_loss": 0.050765056163072586, "eval_precision": 0.987576986984624, "eval_recall": 0.9863465915836483, "eval_runtime": 39.9806, "eval_samples_per_second": 28.164, "eval_steps_per_second": 3.527, "step": 2535 }, { "epoch": 9.02, "learning_rate": 5.338078291814947e-05, "loss": 0.0986, "step": 2540 }, { "epoch": 9.05, "learning_rate": 5.1403716884143935e-05, "loss": 0.0261, "step": 2550 }, { "epoch": 9.09, "learning_rate": 4.9426650850138396e-05, "loss": 0.0691, "step": 2560 }, { "epoch": 9.12, "learning_rate": 4.744958481613286e-05, "loss": 0.0361, "step": 2570 }, { "epoch": 9.16, "learning_rate": 4.5472518782127324e-05, "loss": 0.0829, "step": 2580 }, { "epoch": 9.19, "learning_rate": 4.349545274812179e-05, "loss": 0.0701, "step": 2590 }, { "epoch": 9.23, "learning_rate": 4.151838671411625e-05, "loss": 0.0608, "step": 2600 }, { "epoch": 9.26, "learning_rate": 3.9541320680110714e-05, "loss": 0.056, "step": 2610 }, { "epoch": 9.3, "learning_rate": 3.756425464610518e-05, "loss": 0.0595, "step": 2620 }, { "epoch": 9.33, "learning_rate": 3.558718861209965e-05, "loss": 0.0891, "step": 2630 }, { "epoch": 9.37, "learning_rate": 3.361012257809411e-05, "loss": 0.0716, "step": 2640 }, { "epoch": 9.41, "learning_rate": 3.163305654408857e-05, "loss": 0.0674, "step": 2650 }, { "epoch": 9.44, "learning_rate": 2.965599051008304e-05, "loss": 0.0568, "step": 2660 }, { "epoch": 9.48, "learning_rate": 2.7678924476077503e-05, "loss": 0.0531, "step": 2670 }, { "epoch": 9.51, "learning_rate": 2.5701858442071967e-05, "loss": 0.0749, "step": 2680 }, { "epoch": 9.55, "learning_rate": 2.372479240806643e-05, "loss": 0.09, "step": 2690 }, { "epoch": 9.58, "learning_rate": 2.1747726374060896e-05, "loss": 0.0794, "step": 2700 }, { "epoch": 9.62, "learning_rate": 1.9770660340055357e-05, "loss": 0.0452, "step": 2710 }, { "epoch": 9.65, "learning_rate": 1.7793594306049825e-05, "loss": 0.0822, "step": 2720 }, { "epoch": 9.69, "learning_rate": 1.5816528272044286e-05, "loss": 0.0461, "step": 2730 }, { "epoch": 9.72, "learning_rate": 1.3839462238038752e-05, "loss": 0.043, "step": 2740 }, { "epoch": 9.76, "learning_rate": 1.1862396204033214e-05, "loss": 0.1089, "step": 2750 }, { "epoch": 9.8, "learning_rate": 9.885330170027678e-06, "loss": 0.0963, "step": 2760 }, { "epoch": 9.83, "learning_rate": 7.908264136022143e-06, "loss": 0.0683, "step": 2770 }, { "epoch": 9.87, "learning_rate": 5.931198102016607e-06, "loss": 0.047, "step": 2780 }, { "epoch": 9.9, "learning_rate": 3.954132068011071e-06, "loss": 0.0792, "step": 2790 }, { "epoch": 9.94, "learning_rate": 1.9770660340055357e-06, "loss": 0.0441, "step": 2800 }, { "epoch": 9.97, "learning_rate": 0.0, "loss": 0.049, "step": 2810 }, { "epoch": 9.97, "eval_accuracy": 0.9857904085257548, "eval_f1": 0.9877505171364073, "eval_loss": 0.050837442278862, "eval_precision": 0.9891672844560683, "eval_recall": 0.9865008922572412, "eval_runtime": 39.8176, "eval_samples_per_second": 28.279, "eval_steps_per_second": 3.541, "step": 2810 }, { "epoch": 9.97, "step": 2810, "total_flos": 2.0918954997191803e+19, "train_loss": 0.16025869953462663, "train_runtime": 8523.7997, "train_samples_per_second": 10.57, "train_steps_per_second": 0.33 } ], "logging_steps": 10, "max_steps": 2810, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2.0918954997191803e+19, "trial_name": null, "trial_params": null }