{ "best_metric": 0.14370010793209076, "best_model_checkpoint": "autotrain-pdluc-hzy96/checkpoint-3684", "epoch": 3.0, "eval_steps": 500, "global_step": 3684, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02035830618892508, "grad_norm": 17.59064483642578, "learning_rate": 3.3875338753387534e-06, "loss": 3.2447, "step": 25 }, { "epoch": 0.04071661237785016, "grad_norm": 11.315908432006836, "learning_rate": 6.775067750677507e-06, "loss": 3.0589, "step": 50 }, { "epoch": 0.061074918566775244, "grad_norm": 12.00735092163086, "learning_rate": 1.016260162601626e-05, "loss": 2.855, "step": 75 }, { "epoch": 0.08143322475570032, "grad_norm": 12.152534484863281, "learning_rate": 1.3550135501355014e-05, "loss": 2.435, "step": 100 }, { "epoch": 0.10179153094462541, "grad_norm": 7.038722515106201, "learning_rate": 1.6937669376693767e-05, "loss": 2.0761, "step": 125 }, { "epoch": 0.12214983713355049, "grad_norm": 7.8571672439575195, "learning_rate": 2.032520325203252e-05, "loss": 1.662, "step": 150 }, { "epoch": 0.14250814332247558, "grad_norm": 6.713129043579102, "learning_rate": 2.3712737127371277e-05, "loss": 1.4013, "step": 175 }, { "epoch": 0.16286644951140064, "grad_norm": 8.896265983581543, "learning_rate": 2.7100271002710027e-05, "loss": 1.0493, "step": 200 }, { "epoch": 0.18322475570032573, "grad_norm": 5.255068778991699, "learning_rate": 3.048780487804878e-05, "loss": 0.7443, "step": 225 }, { "epoch": 0.20358306188925082, "grad_norm": 3.356752395629883, "learning_rate": 3.3875338753387534e-05, "loss": 0.8289, "step": 250 }, { "epoch": 0.22394136807817588, "grad_norm": 9.631097793579102, "learning_rate": 3.726287262872629e-05, "loss": 0.8404, "step": 275 }, { "epoch": 0.24429967426710097, "grad_norm": 11.773497581481934, "learning_rate": 4.065040650406504e-05, "loss": 0.7375, "step": 300 }, { "epoch": 0.26465798045602607, "grad_norm": 5.957780361175537, "learning_rate": 4.4037940379403794e-05, "loss": 0.6854, "step": 325 }, { "epoch": 0.28501628664495116, "grad_norm": 2.0840659141540527, "learning_rate": 4.7425474254742554e-05, "loss": 0.5385, "step": 350 }, { "epoch": 0.30537459283387625, "grad_norm": 3.9480788707733154, "learning_rate": 4.990950226244344e-05, "loss": 0.6056, "step": 375 }, { "epoch": 0.3257328990228013, "grad_norm": 5.269024848937988, "learning_rate": 4.953242835595777e-05, "loss": 0.5275, "step": 400 }, { "epoch": 0.34609120521172637, "grad_norm": 13.319404602050781, "learning_rate": 4.91553544494721e-05, "loss": 0.3928, "step": 425 }, { "epoch": 0.36644951140065146, "grad_norm": 2.4886913299560547, "learning_rate": 4.8778280542986424e-05, "loss": 0.3776, "step": 450 }, { "epoch": 0.38680781758957655, "grad_norm": 1.2860759496688843, "learning_rate": 4.8401206636500757e-05, "loss": 0.2673, "step": 475 }, { "epoch": 0.40716612377850164, "grad_norm": 1.7032244205474854, "learning_rate": 4.802413273001509e-05, "loss": 0.4476, "step": 500 }, { "epoch": 0.42752442996742673, "grad_norm": 5.159543514251709, "learning_rate": 4.7647058823529414e-05, "loss": 0.3505, "step": 525 }, { "epoch": 0.44788273615635177, "grad_norm": 0.8131327629089355, "learning_rate": 4.726998491704374e-05, "loss": 0.3492, "step": 550 }, { "epoch": 0.46824104234527686, "grad_norm": 12.537750244140625, "learning_rate": 4.689291101055807e-05, "loss": 0.343, "step": 575 }, { "epoch": 0.48859934853420195, "grad_norm": 3.9186041355133057, "learning_rate": 4.6515837104072405e-05, "loss": 0.3435, "step": 600 }, { "epoch": 0.508957654723127, "grad_norm": 1.7156363725662231, "learning_rate": 4.613876319758673e-05, "loss": 0.385, "step": 625 }, { "epoch": 0.5293159609120521, "grad_norm": 7.234278678894043, "learning_rate": 4.5761689291101056e-05, "loss": 0.2957, "step": 650 }, { "epoch": 0.5496742671009772, "grad_norm": 0.9336622357368469, "learning_rate": 4.538461538461539e-05, "loss": 0.3457, "step": 675 }, { "epoch": 0.5700325732899023, "grad_norm": 4.294078826904297, "learning_rate": 4.5007541478129713e-05, "loss": 0.2994, "step": 700 }, { "epoch": 0.5903908794788274, "grad_norm": 1.4078471660614014, "learning_rate": 4.4630467571644046e-05, "loss": 0.2405, "step": 725 }, { "epoch": 0.6107491856677525, "grad_norm": 79.46843719482422, "learning_rate": 4.425339366515837e-05, "loss": 0.4218, "step": 750 }, { "epoch": 0.6311074918566775, "grad_norm": 6.792111873626709, "learning_rate": 4.3876319758672704e-05, "loss": 0.3484, "step": 775 }, { "epoch": 0.6514657980456026, "grad_norm": 3.4430620670318604, "learning_rate": 4.349924585218703e-05, "loss": 0.3367, "step": 800 }, { "epoch": 0.6718241042345277, "grad_norm": 4.360027313232422, "learning_rate": 4.312217194570136e-05, "loss": 0.3882, "step": 825 }, { "epoch": 0.6921824104234527, "grad_norm": 2.18359112739563, "learning_rate": 4.274509803921569e-05, "loss": 0.3104, "step": 850 }, { "epoch": 0.7125407166123778, "grad_norm": 3.8874869346618652, "learning_rate": 4.236802413273002e-05, "loss": 0.3761, "step": 875 }, { "epoch": 0.7328990228013029, "grad_norm": 3.5901646614074707, "learning_rate": 4.1990950226244345e-05, "loss": 0.2346, "step": 900 }, { "epoch": 0.753257328990228, "grad_norm": 7.589619159698486, "learning_rate": 4.161387631975868e-05, "loss": 0.3045, "step": 925 }, { "epoch": 0.7736156351791531, "grad_norm": 0.05337303876876831, "learning_rate": 4.1236802413273e-05, "loss": 0.2631, "step": 950 }, { "epoch": 0.7939739413680782, "grad_norm": 15.56165885925293, "learning_rate": 4.085972850678733e-05, "loss": 0.3127, "step": 975 }, { "epoch": 0.8143322475570033, "grad_norm": 4.630545139312744, "learning_rate": 4.048265460030166e-05, "loss": 0.2151, "step": 1000 }, { "epoch": 0.8346905537459284, "grad_norm": 4.379152774810791, "learning_rate": 4.010558069381599e-05, "loss": 0.2798, "step": 1025 }, { "epoch": 0.8550488599348535, "grad_norm": 2.1992580890655518, "learning_rate": 3.972850678733032e-05, "loss": 0.2251, "step": 1050 }, { "epoch": 0.8754071661237784, "grad_norm": 1.7602360248565674, "learning_rate": 3.9351432880844644e-05, "loss": 0.2194, "step": 1075 }, { "epoch": 0.8957654723127035, "grad_norm": 2.9637131690979004, "learning_rate": 3.8974358974358976e-05, "loss": 0.1697, "step": 1100 }, { "epoch": 0.9161237785016286, "grad_norm": 17.360435485839844, "learning_rate": 3.859728506787331e-05, "loss": 0.2675, "step": 1125 }, { "epoch": 0.9364820846905537, "grad_norm": 3.023590326309204, "learning_rate": 3.8220211161387634e-05, "loss": 0.403, "step": 1150 }, { "epoch": 0.9568403908794788, "grad_norm": 3.1426753997802734, "learning_rate": 3.784313725490196e-05, "loss": 0.2922, "step": 1175 }, { "epoch": 0.9771986970684039, "grad_norm": 4.420876979827881, "learning_rate": 3.746606334841629e-05, "loss": 0.2266, "step": 1200 }, { "epoch": 0.997557003257329, "grad_norm": 0.16120629012584686, "learning_rate": 3.7088989441930624e-05, "loss": 0.2747, "step": 1225 }, { "epoch": 1.0, "eval_accuracy": 0.9390082476326239, "eval_f1_macro": 0.6049749974508253, "eval_f1_micro": 0.9390082476326239, "eval_f1_weighted": 0.9321492495410462, "eval_loss": 0.22838342189788818, "eval_precision_macro": 0.6498991097180811, "eval_precision_micro": 0.9390082476326239, "eval_precision_weighted": 0.9427061729093369, "eval_recall_macro": 0.6146281611274449, "eval_recall_micro": 0.9390082476326239, "eval_recall_weighted": 0.9390082476326239, "eval_runtime": 1735.5718, "eval_samples_per_second": 5.659, "eval_steps_per_second": 0.354, "step": 1228 }, { "epoch": 1.017915309446254, "grad_norm": 11.002546310424805, "learning_rate": 3.671191553544494e-05, "loss": 0.2739, "step": 1250 }, { "epoch": 1.038273615635179, "grad_norm": 8.473743438720703, "learning_rate": 3.6334841628959275e-05, "loss": 0.3107, "step": 1275 }, { "epoch": 1.0586319218241043, "grad_norm": 4.2348527908325195, "learning_rate": 3.595776772247361e-05, "loss": 0.1918, "step": 1300 }, { "epoch": 1.0789902280130292, "grad_norm": 4.186614513397217, "learning_rate": 3.558069381598793e-05, "loss": 0.2189, "step": 1325 }, { "epoch": 1.0993485342019544, "grad_norm": 0.14162343740463257, "learning_rate": 3.5203619909502266e-05, "loss": 0.296, "step": 1350 }, { "epoch": 1.1197068403908794, "grad_norm": 7.524978160858154, "learning_rate": 3.482654600301659e-05, "loss": 0.2466, "step": 1375 }, { "epoch": 1.1400651465798046, "grad_norm": 1.766981601715088, "learning_rate": 3.4449472096530923e-05, "loss": 0.2404, "step": 1400 }, { "epoch": 1.1604234527687296, "grad_norm": 0.12022794038057327, "learning_rate": 3.407239819004525e-05, "loss": 0.2415, "step": 1425 }, { "epoch": 1.1807817589576548, "grad_norm": 0.11807321012020111, "learning_rate": 3.369532428355958e-05, "loss": 0.1981, "step": 1450 }, { "epoch": 1.2011400651465798, "grad_norm": 0.05754471197724342, "learning_rate": 3.331825037707391e-05, "loss": 0.1551, "step": 1475 }, { "epoch": 1.221498371335505, "grad_norm": 9.95758056640625, "learning_rate": 3.294117647058824e-05, "loss": 0.3097, "step": 1500 }, { "epoch": 1.24185667752443, "grad_norm": 0.8136438131332397, "learning_rate": 3.2564102564102565e-05, "loss": 0.1911, "step": 1525 }, { "epoch": 1.262214983713355, "grad_norm": 0.6201236844062805, "learning_rate": 3.21870286576169e-05, "loss": 0.0893, "step": 1550 }, { "epoch": 1.2825732899022801, "grad_norm": 0.15112124383449554, "learning_rate": 3.180995475113122e-05, "loss": 0.2805, "step": 1575 }, { "epoch": 1.3029315960912053, "grad_norm": 2.3411223888397217, "learning_rate": 3.143288084464555e-05, "loss": 0.1462, "step": 1600 }, { "epoch": 1.3232899022801303, "grad_norm": 0.041621431708335876, "learning_rate": 3.105580693815988e-05, "loss": 0.2018, "step": 1625 }, { "epoch": 1.3436482084690553, "grad_norm": 1.9320759773254395, "learning_rate": 3.067873303167421e-05, "loss": 0.2083, "step": 1650 }, { "epoch": 1.3640065146579805, "grad_norm": 0.14204758405685425, "learning_rate": 3.0301659125188535e-05, "loss": 0.0894, "step": 1675 }, { "epoch": 1.3843648208469055, "grad_norm": 0.18657904863357544, "learning_rate": 2.9924585218702867e-05, "loss": 0.249, "step": 1700 }, { "epoch": 1.4047231270358307, "grad_norm": 0.3584413528442383, "learning_rate": 2.9547511312217196e-05, "loss": 0.2378, "step": 1725 }, { "epoch": 1.4250814332247557, "grad_norm": 9.604140281677246, "learning_rate": 2.9170437405731525e-05, "loss": 0.2683, "step": 1750 }, { "epoch": 1.4454397394136809, "grad_norm": 0.7611903548240662, "learning_rate": 2.879336349924585e-05, "loss": 0.2773, "step": 1775 }, { "epoch": 1.4657980456026058, "grad_norm": 4.293066501617432, "learning_rate": 2.8416289592760183e-05, "loss": 0.323, "step": 1800 }, { "epoch": 1.4861563517915308, "grad_norm": 0.13425233960151672, "learning_rate": 2.8039215686274512e-05, "loss": 0.1535, "step": 1825 }, { "epoch": 1.506514657980456, "grad_norm": 0.22385647892951965, "learning_rate": 2.766214177978884e-05, "loss": 0.3187, "step": 1850 }, { "epoch": 1.5268729641693812, "grad_norm": 0.4442937970161438, "learning_rate": 2.7285067873303166e-05, "loss": 0.2076, "step": 1875 }, { "epoch": 1.5472312703583062, "grad_norm": 1.3991447687149048, "learning_rate": 2.6907993966817495e-05, "loss": 0.2729, "step": 1900 }, { "epoch": 1.5675895765472312, "grad_norm": 0.29188427329063416, "learning_rate": 2.6530920060331828e-05, "loss": 0.222, "step": 1925 }, { "epoch": 1.5879478827361564, "grad_norm": 0.29497218132019043, "learning_rate": 2.6153846153846157e-05, "loss": 0.264, "step": 1950 }, { "epoch": 1.6083061889250816, "grad_norm": 2.5838730335235596, "learning_rate": 2.5776772247360482e-05, "loss": 0.2403, "step": 1975 }, { "epoch": 1.6286644951140063, "grad_norm": 0.02832282893359661, "learning_rate": 2.539969834087481e-05, "loss": 0.2487, "step": 2000 }, { "epoch": 1.6490228013029316, "grad_norm": 0.12363941967487335, "learning_rate": 2.502262443438914e-05, "loss": 0.1515, "step": 2025 }, { "epoch": 1.6693811074918568, "grad_norm": 5.976169109344482, "learning_rate": 2.4645550527903472e-05, "loss": 0.2113, "step": 2050 }, { "epoch": 1.6897394136807817, "grad_norm": 1.9991737604141235, "learning_rate": 2.4268476621417798e-05, "loss": 0.1986, "step": 2075 }, { "epoch": 1.7100977198697067, "grad_norm": 0.8933628797531128, "learning_rate": 2.389140271493213e-05, "loss": 0.3856, "step": 2100 }, { "epoch": 1.730456026058632, "grad_norm": 23.50140953063965, "learning_rate": 2.3514328808446456e-05, "loss": 0.2153, "step": 2125 }, { "epoch": 1.7508143322475571, "grad_norm": 4.561667442321777, "learning_rate": 2.3137254901960788e-05, "loss": 0.2402, "step": 2150 }, { "epoch": 1.771172638436482, "grad_norm": 4.280697822570801, "learning_rate": 2.2760180995475113e-05, "loss": 0.1555, "step": 2175 }, { "epoch": 1.791530944625407, "grad_norm": 4.35147762298584, "learning_rate": 2.2383107088989442e-05, "loss": 0.1045, "step": 2200 }, { "epoch": 1.8118892508143323, "grad_norm": 0.05062297731637955, "learning_rate": 2.200603318250377e-05, "loss": 0.2187, "step": 2225 }, { "epoch": 1.8322475570032575, "grad_norm": 5.871191024780273, "learning_rate": 2.16289592760181e-05, "loss": 0.2897, "step": 2250 }, { "epoch": 1.8526058631921825, "grad_norm": 0.32290223240852356, "learning_rate": 2.125188536953243e-05, "loss": 0.2238, "step": 2275 }, { "epoch": 1.8729641693811074, "grad_norm": 4.226949691772461, "learning_rate": 2.0874811463046758e-05, "loss": 0.1555, "step": 2300 }, { "epoch": 1.8933224755700326, "grad_norm": 4.449354648590088, "learning_rate": 2.0497737556561087e-05, "loss": 0.1908, "step": 2325 }, { "epoch": 1.9136807817589576, "grad_norm": 2.021489381790161, "learning_rate": 2.0120663650075416e-05, "loss": 0.1242, "step": 2350 }, { "epoch": 1.9340390879478826, "grad_norm": 0.148870587348938, "learning_rate": 1.9743589743589745e-05, "loss": 0.2343, "step": 2375 }, { "epoch": 1.9543973941368078, "grad_norm": 0.22137416899204254, "learning_rate": 1.9366515837104074e-05, "loss": 0.2007, "step": 2400 }, { "epoch": 1.974755700325733, "grad_norm": 0.20752932131290436, "learning_rate": 1.8989441930618403e-05, "loss": 0.1767, "step": 2425 }, { "epoch": 1.995114006514658, "grad_norm": 0.3285848796367645, "learning_rate": 1.861236802413273e-05, "loss": 0.2494, "step": 2450 }, { "epoch": 2.0, "eval_accuracy": 0.9507178495061602, "eval_f1_macro": 0.629593020817204, "eval_f1_micro": 0.9507178495061602, "eval_f1_weighted": 0.9448585568165518, "eval_loss": 0.16687127947807312, "eval_precision_macro": 0.6521951210919578, "eval_precision_micro": 0.9507178495061602, "eval_precision_weighted": 0.9524468905040049, "eval_recall_macro": 0.6493699850001685, "eval_recall_micro": 0.9507178495061602, "eval_recall_weighted": 0.9507178495061602, "eval_runtime": 1767.0143, "eval_samples_per_second": 5.558, "eval_steps_per_second": 0.347, "step": 2456 }, { "epoch": 2.015472312703583, "grad_norm": 0.6184514164924622, "learning_rate": 1.8235294117647057e-05, "loss": 0.1294, "step": 2475 }, { "epoch": 2.035830618892508, "grad_norm": 0.06615210324525833, "learning_rate": 1.785822021116139e-05, "loss": 0.2455, "step": 2500 }, { "epoch": 2.0561889250814334, "grad_norm": 0.30965787172317505, "learning_rate": 1.7481146304675715e-05, "loss": 0.138, "step": 2525 }, { "epoch": 2.076547231270358, "grad_norm": 0.25489869713783264, "learning_rate": 1.7104072398190047e-05, "loss": 0.2043, "step": 2550 }, { "epoch": 2.0969055374592833, "grad_norm": 0.9780781865119934, "learning_rate": 1.6726998491704373e-05, "loss": 0.2884, "step": 2575 }, { "epoch": 2.1172638436482085, "grad_norm": 0.16642184555530548, "learning_rate": 1.6349924585218705e-05, "loss": 0.183, "step": 2600 }, { "epoch": 2.1376221498371337, "grad_norm": 6.976410388946533, "learning_rate": 1.597285067873303e-05, "loss": 0.297, "step": 2625 }, { "epoch": 2.1579804560260585, "grad_norm": 3.697134017944336, "learning_rate": 1.559577677224736e-05, "loss": 0.2494, "step": 2650 }, { "epoch": 2.1783387622149837, "grad_norm": 8.915691375732422, "learning_rate": 1.5218702865761689e-05, "loss": 0.1559, "step": 2675 }, { "epoch": 2.198697068403909, "grad_norm": 0.500405490398407, "learning_rate": 1.484162895927602e-05, "loss": 0.266, "step": 2700 }, { "epoch": 2.219055374592834, "grad_norm": 0.007504461333155632, "learning_rate": 1.4464555052790348e-05, "loss": 0.2264, "step": 2725 }, { "epoch": 2.239413680781759, "grad_norm": 8.733315467834473, "learning_rate": 1.4087481146304677e-05, "loss": 0.1124, "step": 2750 }, { "epoch": 2.259771986970684, "grad_norm": 0.24120819568634033, "learning_rate": 1.3710407239819006e-05, "loss": 0.1786, "step": 2775 }, { "epoch": 2.2801302931596092, "grad_norm": 0.3991820812225342, "learning_rate": 1.3333333333333333e-05, "loss": 0.1999, "step": 2800 }, { "epoch": 2.300488599348534, "grad_norm": 0.17051202058792114, "learning_rate": 1.2956259426847664e-05, "loss": 0.1106, "step": 2825 }, { "epoch": 2.320846905537459, "grad_norm": 0.31717777252197266, "learning_rate": 1.2579185520361991e-05, "loss": 0.1515, "step": 2850 }, { "epoch": 2.3412052117263844, "grad_norm": 0.21376913785934448, "learning_rate": 1.220211161387632e-05, "loss": 0.132, "step": 2875 }, { "epoch": 2.3615635179153096, "grad_norm": 11.429539680480957, "learning_rate": 1.1825037707390649e-05, "loss": 0.1684, "step": 2900 }, { "epoch": 2.3819218241042344, "grad_norm": 2.0131633281707764, "learning_rate": 1.1447963800904978e-05, "loss": 0.2076, "step": 2925 }, { "epoch": 2.4022801302931596, "grad_norm": 0.16979430615901947, "learning_rate": 1.1070889894419307e-05, "loss": 0.2068, "step": 2950 }, { "epoch": 2.4226384364820848, "grad_norm": 41.4106330871582, "learning_rate": 1.0693815987933636e-05, "loss": 0.2207, "step": 2975 }, { "epoch": 2.44299674267101, "grad_norm": 0.23001661896705627, "learning_rate": 1.0316742081447963e-05, "loss": 0.1316, "step": 3000 }, { "epoch": 2.4633550488599347, "grad_norm": 0.10534968227148056, "learning_rate": 9.939668174962292e-06, "loss": 0.1788, "step": 3025 }, { "epoch": 2.48371335504886, "grad_norm": 4.733824729919434, "learning_rate": 9.562594268476621e-06, "loss": 0.1504, "step": 3050 }, { "epoch": 2.504071661237785, "grad_norm": 0.3086649477481842, "learning_rate": 9.185520361990952e-06, "loss": 0.1202, "step": 3075 }, { "epoch": 2.52442996742671, "grad_norm": 0.25835877656936646, "learning_rate": 8.80844645550528e-06, "loss": 0.1652, "step": 3100 }, { "epoch": 2.544788273615635, "grad_norm": 0.08818981796503067, "learning_rate": 8.43137254901961e-06, "loss": 0.1482, "step": 3125 }, { "epoch": 2.5651465798045603, "grad_norm": 0.11015627533197403, "learning_rate": 8.054298642533938e-06, "loss": 0.1165, "step": 3150 }, { "epoch": 2.5855048859934855, "grad_norm": 0.042187802493572235, "learning_rate": 7.677224736048266e-06, "loss": 0.176, "step": 3175 }, { "epoch": 2.6058631921824107, "grad_norm": 0.22007593512535095, "learning_rate": 7.300150829562595e-06, "loss": 0.1443, "step": 3200 }, { "epoch": 2.6262214983713354, "grad_norm": 2.1402783393859863, "learning_rate": 6.923076923076923e-06, "loss": 0.1073, "step": 3225 }, { "epoch": 2.6465798045602607, "grad_norm": 0.3307274580001831, "learning_rate": 6.546003016591252e-06, "loss": 0.045, "step": 3250 }, { "epoch": 2.666938110749186, "grad_norm": 0.30274340510368347, "learning_rate": 6.168929110105581e-06, "loss": 0.2391, "step": 3275 }, { "epoch": 2.6872964169381106, "grad_norm": 1.1078681945800781, "learning_rate": 5.79185520361991e-06, "loss": 0.1484, "step": 3300 }, { "epoch": 2.707654723127036, "grad_norm": 0.587151050567627, "learning_rate": 5.414781297134239e-06, "loss": 0.1787, "step": 3325 }, { "epoch": 2.728013029315961, "grad_norm": 6.527975082397461, "learning_rate": 5.037707390648567e-06, "loss": 0.1281, "step": 3350 }, { "epoch": 2.7483713355048858, "grad_norm": 0.0781206339597702, "learning_rate": 4.660633484162896e-06, "loss": 0.1965, "step": 3375 }, { "epoch": 2.768729641693811, "grad_norm": 2.1731104850769043, "learning_rate": 4.283559577677225e-06, "loss": 0.2292, "step": 3400 }, { "epoch": 2.789087947882736, "grad_norm": 2.2396810054779053, "learning_rate": 3.906485671191554e-06, "loss": 0.1857, "step": 3425 }, { "epoch": 2.8094462540716614, "grad_norm": 3.685072422027588, "learning_rate": 3.5294117647058825e-06, "loss": 0.3303, "step": 3450 }, { "epoch": 2.8298045602605866, "grad_norm": 1.8913475275039673, "learning_rate": 3.1523378582202114e-06, "loss": 0.041, "step": 3475 }, { "epoch": 2.8501628664495113, "grad_norm": 5.620538711547852, "learning_rate": 2.77526395173454e-06, "loss": 0.2987, "step": 3500 }, { "epoch": 2.8705211726384365, "grad_norm": 24.464252471923828, "learning_rate": 2.3981900452488693e-06, "loss": 0.15, "step": 3525 }, { "epoch": 2.8908794788273617, "grad_norm": 0.011870063841342926, "learning_rate": 2.0211161387631978e-06, "loss": 0.1082, "step": 3550 }, { "epoch": 2.9112377850162865, "grad_norm": 0.16878418624401093, "learning_rate": 1.6440422322775265e-06, "loss": 0.0885, "step": 3575 }, { "epoch": 2.9315960912052117, "grad_norm": 1.7386406660079956, "learning_rate": 1.2669683257918552e-06, "loss": 0.1607, "step": 3600 }, { "epoch": 2.951954397394137, "grad_norm": 2.244051694869995, "learning_rate": 8.89894419306184e-07, "loss": 0.2452, "step": 3625 }, { "epoch": 2.9723127035830617, "grad_norm": 0.0863373726606369, "learning_rate": 5.128205128205128e-07, "loss": 0.2171, "step": 3650 }, { "epoch": 2.992671009771987, "grad_norm": 0.1831163614988327, "learning_rate": 1.3574660633484163e-07, "loss": 0.0981, "step": 3675 }, { "epoch": 3.0, "eval_accuracy": 0.9552998676305875, "eval_f1_macro": 0.6361712383091356, "eval_f1_micro": 0.9552998676305875, "eval_f1_weighted": 0.9495200818880537, "eval_loss": 0.14370010793209076, "eval_precision_macro": 0.6597709606900284, "eval_precision_micro": 0.9552998676305875, "eval_precision_weighted": 0.9565480825058283, "eval_recall_macro": 0.6541047025742054, "eval_recall_micro": 0.9552998676305875, "eval_recall_weighted": 0.9552998676305875, "eval_runtime": 1742.936, "eval_samples_per_second": 5.635, "eval_steps_per_second": 0.352, "step": 3684 } ], "logging_steps": 25, "max_steps": 3684, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1938427870708224.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }