{ "best_metric": 0.0027391575276851654, "best_model_checkpoint": "sj-det-6/checkpoint-9162", "epoch": 3.0, "eval_steps": 500, "global_step": 9162, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008185985592665358, "grad_norm": 14.986274719238281, "learning_rate": 1.2540894220283533e-06, "loss": 1.1682, "step": 25 }, { "epoch": 0.016371971185330715, "grad_norm": 8.189007759094238, "learning_rate": 2.6172300981461287e-06, "loss": 0.7882, "step": 50 }, { "epoch": 0.02455795677799607, "grad_norm": 4.545085906982422, "learning_rate": 3.980370774263904e-06, "loss": 0.3987, "step": 75 }, { "epoch": 0.03274394237066143, "grad_norm": 0.8907670974731445, "learning_rate": 5.343511450381679e-06, "loss": 0.1198, "step": 100 }, { "epoch": 0.040929927963326784, "grad_norm": 0.8682488203048706, "learning_rate": 6.7066521264994545e-06, "loss": 0.1166, "step": 125 }, { "epoch": 0.04911591355599214, "grad_norm": 0.1406942456960678, "learning_rate": 8.069792802617231e-06, "loss": 0.0797, "step": 150 }, { "epoch": 0.0573018991486575, "grad_norm": 0.030166020616889, "learning_rate": 9.432933478735007e-06, "loss": 0.0518, "step": 175 }, { "epoch": 0.06548788474132286, "grad_norm": 0.0494314506649971, "learning_rate": 1.0796074154852782e-05, "loss": 0.0505, "step": 200 }, { "epoch": 0.07367387033398821, "grad_norm": 0.01392674632370472, "learning_rate": 1.2159214830970557e-05, "loss": 0.0272, "step": 225 }, { "epoch": 0.08185985592665357, "grad_norm": 0.0814874917268753, "learning_rate": 1.3522355507088334e-05, "loss": 0.0365, "step": 250 }, { "epoch": 0.09004584151931892, "grad_norm": 0.04325518757104874, "learning_rate": 1.4885496183206107e-05, "loss": 0.0576, "step": 275 }, { "epoch": 0.09823182711198428, "grad_norm": 0.09735051542520523, "learning_rate": 1.624863685932388e-05, "loss": 0.0329, "step": 300 }, { "epoch": 0.10641781270464964, "grad_norm": 0.64588862657547, "learning_rate": 1.761177753544166e-05, "loss": 0.0802, "step": 325 }, { "epoch": 0.114603798297315, "grad_norm": 0.011760449036955833, "learning_rate": 1.8974918211559434e-05, "loss": 0.0524, "step": 350 }, { "epoch": 0.12278978388998035, "grad_norm": 0.1449550837278366, "learning_rate": 2.033805888767721e-05, "loss": 0.0347, "step": 375 }, { "epoch": 0.13097576948264572, "grad_norm": 0.018402298912405968, "learning_rate": 2.1701199563794985e-05, "loss": 0.0155, "step": 400 }, { "epoch": 0.13916175507531106, "grad_norm": 0.016214437782764435, "learning_rate": 2.306434023991276e-05, "loss": 0.0988, "step": 425 }, { "epoch": 0.14734774066797643, "grad_norm": 0.08800540119409561, "learning_rate": 2.4427480916030535e-05, "loss": 0.0428, "step": 450 }, { "epoch": 0.15553372626064177, "grad_norm": 0.005783313885331154, "learning_rate": 2.5790621592148313e-05, "loss": 0.1005, "step": 475 }, { "epoch": 0.16371971185330714, "grad_norm": 0.028863191604614258, "learning_rate": 2.7153762268266085e-05, "loss": 0.0427, "step": 500 }, { "epoch": 0.1719056974459725, "grad_norm": 6.242043495178223, "learning_rate": 2.851690294438386e-05, "loss": 0.0868, "step": 525 }, { "epoch": 0.18009168303863785, "grad_norm": 0.013895904645323753, "learning_rate": 2.9880043620501635e-05, "loss": 0.0898, "step": 550 }, { "epoch": 0.1882776686313032, "grad_norm": 0.019419686868786812, "learning_rate": 3.124318429661941e-05, "loss": 0.0351, "step": 575 }, { "epoch": 0.19646365422396855, "grad_norm": 0.008130255155265331, "learning_rate": 3.260632497273719e-05, "loss": 0.0164, "step": 600 }, { "epoch": 0.20464963981663392, "grad_norm": 6.5059814453125, "learning_rate": 3.3969465648854964e-05, "loss": 0.0359, "step": 625 }, { "epoch": 0.2128356254092993, "grad_norm": 0.010089697316288948, "learning_rate": 3.533260632497274e-05, "loss": 0.0305, "step": 650 }, { "epoch": 0.22102161100196463, "grad_norm": 0.014347407966852188, "learning_rate": 3.6695747001090515e-05, "loss": 0.0435, "step": 675 }, { "epoch": 0.22920759659463, "grad_norm": 0.017506686970591545, "learning_rate": 3.805888767720829e-05, "loss": 0.0486, "step": 700 }, { "epoch": 0.23739358218729534, "grad_norm": 0.004301242996007204, "learning_rate": 3.9422028353326065e-05, "loss": 0.0002, "step": 725 }, { "epoch": 0.2455795677799607, "grad_norm": 0.00440743425861001, "learning_rate": 4.078516902944384e-05, "loss": 0.0134, "step": 750 }, { "epoch": 0.2537655533726261, "grad_norm": 0.037835538387298584, "learning_rate": 4.2148309705561615e-05, "loss": 0.0334, "step": 775 }, { "epoch": 0.26195153896529144, "grad_norm": 0.11478447914123535, "learning_rate": 4.351145038167939e-05, "loss": 0.0665, "step": 800 }, { "epoch": 0.27013752455795675, "grad_norm": 0.006648090668022633, "learning_rate": 4.4874591057797165e-05, "loss": 0.0033, "step": 825 }, { "epoch": 0.2783235101506221, "grad_norm": 8.948714256286621, "learning_rate": 4.623773173391494e-05, "loss": 0.0996, "step": 850 }, { "epoch": 0.2865094957432875, "grad_norm": 0.012912426143884659, "learning_rate": 4.7600872410032716e-05, "loss": 0.0102, "step": 875 }, { "epoch": 0.29469548133595286, "grad_norm": 0.0026352498680353165, "learning_rate": 4.89640130861505e-05, "loss": 0.0226, "step": 900 }, { "epoch": 0.3028814669286182, "grad_norm": 0.15103408694267273, "learning_rate": 4.996361431170407e-05, "loss": 0.0225, "step": 925 }, { "epoch": 0.31106745252128354, "grad_norm": 0.04878251254558563, "learning_rate": 4.981200727713766e-05, "loss": 0.0005, "step": 950 }, { "epoch": 0.3192534381139489, "grad_norm": 0.00841938890516758, "learning_rate": 4.966040024257126e-05, "loss": 0.0513, "step": 975 }, { "epoch": 0.3274394237066143, "grad_norm": 0.04339870065450668, "learning_rate": 4.9508793208004854e-05, "loss": 0.0328, "step": 1000 }, { "epoch": 0.33562540929927964, "grad_norm": 0.00045626997598446906, "learning_rate": 4.935718617343845e-05, "loss": 0.0124, "step": 1025 }, { "epoch": 0.343811394891945, "grad_norm": 0.00013843162741977721, "learning_rate": 4.9205579138872046e-05, "loss": 0.0, "step": 1050 }, { "epoch": 0.3519973804846103, "grad_norm": 1.9331814050674438, "learning_rate": 4.905397210430564e-05, "loss": 0.0504, "step": 1075 }, { "epoch": 0.3601833660772757, "grad_norm": 0.002882858505472541, "learning_rate": 4.890236506973924e-05, "loss": 0.1157, "step": 1100 }, { "epoch": 0.36836935166994106, "grad_norm": 0.022938862442970276, "learning_rate": 4.875075803517283e-05, "loss": 0.0716, "step": 1125 }, { "epoch": 0.3765553372626064, "grad_norm": 0.0010779292788356543, "learning_rate": 4.859915100060643e-05, "loss": 0.0419, "step": 1150 }, { "epoch": 0.3847413228552718, "grad_norm": 0.03177574649453163, "learning_rate": 4.8447543966040024e-05, "loss": 0.0249, "step": 1175 }, { "epoch": 0.3929273084479371, "grad_norm": 0.0006776170921511948, "learning_rate": 4.8295936931473626e-05, "loss": 0.0415, "step": 1200 }, { "epoch": 0.4011132940406025, "grad_norm": 0.00865075271576643, "learning_rate": 4.814432989690722e-05, "loss": 0.0304, "step": 1225 }, { "epoch": 0.40929927963326784, "grad_norm": 0.004142033401876688, "learning_rate": 4.799272286234082e-05, "loss": 0.066, "step": 1250 }, { "epoch": 0.4174852652259332, "grad_norm": 0.006911768112331629, "learning_rate": 4.784111582777441e-05, "loss": 0.0345, "step": 1275 }, { "epoch": 0.4256712508185986, "grad_norm": 0.011829985305666924, "learning_rate": 4.768950879320801e-05, "loss": 0.0324, "step": 1300 }, { "epoch": 0.4338572364112639, "grad_norm": 0.024410191923379898, "learning_rate": 4.7537901758641604e-05, "loss": 0.0412, "step": 1325 }, { "epoch": 0.44204322200392926, "grad_norm": 0.0020343943033367395, "learning_rate": 4.73862947240752e-05, "loss": 0.0133, "step": 1350 }, { "epoch": 0.4502292075965946, "grad_norm": 0.0009943109471350908, "learning_rate": 4.7234687689508795e-05, "loss": 0.0004, "step": 1375 }, { "epoch": 0.45841519318926, "grad_norm": 0.00024950483930297196, "learning_rate": 4.708308065494239e-05, "loss": 0.0, "step": 1400 }, { "epoch": 0.46660117878192536, "grad_norm": 0.0015270832227542996, "learning_rate": 4.6931473620375986e-05, "loss": 0.0314, "step": 1425 }, { "epoch": 0.4747871643745907, "grad_norm": 9.285660743713379, "learning_rate": 4.677986658580958e-05, "loss": 0.0955, "step": 1450 }, { "epoch": 0.48297314996725604, "grad_norm": 0.004155229777097702, "learning_rate": 4.6628259551243184e-05, "loss": 0.003, "step": 1475 }, { "epoch": 0.4911591355599214, "grad_norm": 0.013534590601921082, "learning_rate": 4.647665251667678e-05, "loss": 0.0269, "step": 1500 }, { "epoch": 0.4993451211525868, "grad_norm": 0.004476598929613829, "learning_rate": 4.6325045482110375e-05, "loss": 0.0011, "step": 1525 }, { "epoch": 0.5075311067452521, "grad_norm": 0.0038523597177118063, "learning_rate": 4.617343844754397e-05, "loss": 0.0015, "step": 1550 }, { "epoch": 0.5157170923379175, "grad_norm": 0.03839994966983795, "learning_rate": 4.602183141297756e-05, "loss": 0.1135, "step": 1575 }, { "epoch": 0.5239030779305829, "grad_norm": 0.000390953995520249, "learning_rate": 4.587022437841116e-05, "loss": 0.0015, "step": 1600 }, { "epoch": 0.5320890635232483, "grad_norm": 0.00021860921697225422, "learning_rate": 4.571861734384476e-05, "loss": 0.0015, "step": 1625 }, { "epoch": 0.5402750491159135, "grad_norm": 0.005596529692411423, "learning_rate": 4.556701030927835e-05, "loss": 0.1111, "step": 1650 }, { "epoch": 0.5484610347085789, "grad_norm": 5.192307949066162, "learning_rate": 4.541540327471195e-05, "loss": 0.056, "step": 1675 }, { "epoch": 0.5566470203012442, "grad_norm": 0.004266222473233938, "learning_rate": 4.5263796240145544e-05, "loss": 0.0078, "step": 1700 }, { "epoch": 0.5648330058939096, "grad_norm": 4.940586090087891, "learning_rate": 4.511218920557914e-05, "loss": 0.0416, "step": 1725 }, { "epoch": 0.573018991486575, "grad_norm": 0.056938640773296356, "learning_rate": 4.4960582171012735e-05, "loss": 0.0299, "step": 1750 }, { "epoch": 0.5812049770792403, "grad_norm": 0.025186289101839066, "learning_rate": 4.480897513644634e-05, "loss": 0.0046, "step": 1775 }, { "epoch": 0.5893909626719057, "grad_norm": 0.0009007578482851386, "learning_rate": 4.465736810187993e-05, "loss": 0.0402, "step": 1800 }, { "epoch": 0.5975769482645711, "grad_norm": 0.006263722199946642, "learning_rate": 4.450576106731352e-05, "loss": 0.0008, "step": 1825 }, { "epoch": 0.6057629338572365, "grad_norm": 0.002118312055245042, "learning_rate": 4.435415403274712e-05, "loss": 0.0003, "step": 1850 }, { "epoch": 0.6139489194499018, "grad_norm": 0.011466097086668015, "learning_rate": 4.420254699818071e-05, "loss": 0.0025, "step": 1875 }, { "epoch": 0.6221349050425671, "grad_norm": 0.27655062079429626, "learning_rate": 4.4050939963614315e-05, "loss": 0.0694, "step": 1900 }, { "epoch": 0.6303208906352324, "grad_norm": 0.011102208867669106, "learning_rate": 4.389933292904791e-05, "loss": 0.0771, "step": 1925 }, { "epoch": 0.6385068762278978, "grad_norm": 0.03391822427511215, "learning_rate": 4.3747725894481507e-05, "loss": 0.0516, "step": 1950 }, { "epoch": 0.6466928618205632, "grad_norm": 1.2409979104995728, "learning_rate": 4.35961188599151e-05, "loss": 0.0052, "step": 1975 }, { "epoch": 0.6548788474132285, "grad_norm": 0.005223038140684366, "learning_rate": 4.34445118253487e-05, "loss": 0.0265, "step": 2000 }, { "epoch": 0.6630648330058939, "grad_norm": 0.015766901895403862, "learning_rate": 4.329290479078229e-05, "loss": 0.0023, "step": 2025 }, { "epoch": 0.6712508185985593, "grad_norm": 0.030491165816783905, "learning_rate": 4.3141297756215896e-05, "loss": 0.0343, "step": 2050 }, { "epoch": 0.6794368041912247, "grad_norm": 0.00046129990369081497, "learning_rate": 4.298969072164949e-05, "loss": 0.0121, "step": 2075 }, { "epoch": 0.68762278978389, "grad_norm": 0.0001839818141888827, "learning_rate": 4.283808368708308e-05, "loss": 0.0012, "step": 2100 }, { "epoch": 0.6958087753765554, "grad_norm": 0.0003591451095417142, "learning_rate": 4.2686476652516676e-05, "loss": 0.0374, "step": 2125 }, { "epoch": 0.7039947609692206, "grad_norm": 0.009124216623604298, "learning_rate": 4.253486961795027e-05, "loss": 0.0082, "step": 2150 }, { "epoch": 0.712180746561886, "grad_norm": 0.0014978774124756455, "learning_rate": 4.2383262583383874e-05, "loss": 0.0306, "step": 2175 }, { "epoch": 0.7203667321545514, "grad_norm": 1.0534560680389404, "learning_rate": 4.223165554881747e-05, "loss": 0.0008, "step": 2200 }, { "epoch": 0.7285527177472167, "grad_norm": 0.005840160418301821, "learning_rate": 4.2080048514251065e-05, "loss": 0.0562, "step": 2225 }, { "epoch": 0.7367387033398821, "grad_norm": 4.12369966506958, "learning_rate": 4.192844147968466e-05, "loss": 0.01, "step": 2250 }, { "epoch": 0.7449246889325475, "grad_norm": 0.0006641410291194916, "learning_rate": 4.1776834445118256e-05, "loss": 0.0001, "step": 2275 }, { "epoch": 0.7531106745252129, "grad_norm": 0.0009534868295304477, "learning_rate": 4.162522741055185e-05, "loss": 0.0854, "step": 2300 }, { "epoch": 0.7612966601178782, "grad_norm": 0.02312465012073517, "learning_rate": 4.147362037598545e-05, "loss": 0.0523, "step": 2325 }, { "epoch": 0.7694826457105436, "grad_norm": 0.009137257933616638, "learning_rate": 4.132201334141904e-05, "loss": 0.0337, "step": 2350 }, { "epoch": 0.777668631303209, "grad_norm": 0.01361013948917389, "learning_rate": 4.117040630685264e-05, "loss": 0.0228, "step": 2375 }, { "epoch": 0.7858546168958742, "grad_norm": 0.0018830469343811274, "learning_rate": 4.1018799272286234e-05, "loss": 0.0129, "step": 2400 }, { "epoch": 0.7940406024885396, "grad_norm": 0.6149205565452576, "learning_rate": 4.086719223771983e-05, "loss": 0.0404, "step": 2425 }, { "epoch": 0.802226588081205, "grad_norm": 0.005154294427484274, "learning_rate": 4.0715585203153425e-05, "loss": 0.0011, "step": 2450 }, { "epoch": 0.8104125736738703, "grad_norm": 0.022492127493023872, "learning_rate": 4.056397816858703e-05, "loss": 0.042, "step": 2475 }, { "epoch": 0.8185985592665357, "grad_norm": 0.020115699619054794, "learning_rate": 4.041237113402062e-05, "loss": 0.0495, "step": 2500 }, { "epoch": 0.826784544859201, "grad_norm": 0.008540102280676365, "learning_rate": 4.026076409945422e-05, "loss": 0.0074, "step": 2525 }, { "epoch": 0.8349705304518664, "grad_norm": 0.00022775791876483709, "learning_rate": 4.0109157064887814e-05, "loss": 0.0001, "step": 2550 }, { "epoch": 0.8431565160445318, "grad_norm": 0.0014885278651490808, "learning_rate": 3.995755003032141e-05, "loss": 0.0207, "step": 2575 }, { "epoch": 0.8513425016371972, "grad_norm": 0.0014814437599852681, "learning_rate": 3.9805942995755005e-05, "loss": 0.0219, "step": 2600 }, { "epoch": 0.8595284872298625, "grad_norm": 0.0025843679904937744, "learning_rate": 3.96543359611886e-05, "loss": 0.0137, "step": 2625 }, { "epoch": 0.8677144728225278, "grad_norm": 0.04107142984867096, "learning_rate": 3.9502728926622196e-05, "loss": 0.0695, "step": 2650 }, { "epoch": 0.8759004584151932, "grad_norm": 0.005535195115953684, "learning_rate": 3.935112189205579e-05, "loss": 0.0157, "step": 2675 }, { "epoch": 0.8840864440078585, "grad_norm": 5.580929756164551, "learning_rate": 3.919951485748939e-05, "loss": 0.0806, "step": 2700 }, { "epoch": 0.8922724296005239, "grad_norm": 0.0086448909714818, "learning_rate": 3.904790782292298e-05, "loss": 0.046, "step": 2725 }, { "epoch": 0.9004584151931893, "grad_norm": 0.0019950985442847013, "learning_rate": 3.8896300788356585e-05, "loss": 0.0004, "step": 2750 }, { "epoch": 0.9086444007858546, "grad_norm": 0.04631026089191437, "learning_rate": 3.874469375379018e-05, "loss": 0.1026, "step": 2775 }, { "epoch": 0.91683038637852, "grad_norm": 0.14316339790821075, "learning_rate": 3.8593086719223776e-05, "loss": 0.0173, "step": 2800 }, { "epoch": 0.9250163719711854, "grad_norm": 0.002735959365963936, "learning_rate": 3.844147968465737e-05, "loss": 0.0102, "step": 2825 }, { "epoch": 0.9332023575638507, "grad_norm": 0.007864673621952534, "learning_rate": 3.828987265009097e-05, "loss": 0.001, "step": 2850 }, { "epoch": 0.9413883431565161, "grad_norm": 0.0006219564820639789, "learning_rate": 3.813826561552456e-05, "loss": 0.0121, "step": 2875 }, { "epoch": 0.9495743287491814, "grad_norm": 0.20409245789051056, "learning_rate": 3.798665858095816e-05, "loss": 0.0383, "step": 2900 }, { "epoch": 0.9577603143418467, "grad_norm": 0.0005641476600430906, "learning_rate": 3.7835051546391754e-05, "loss": 0.0054, "step": 2925 }, { "epoch": 0.9659462999345121, "grad_norm": 0.004506452474743128, "learning_rate": 3.768344451182535e-05, "loss": 0.0167, "step": 2950 }, { "epoch": 0.9741322855271775, "grad_norm": 0.0007494801538996398, "learning_rate": 3.7531837477258945e-05, "loss": 0.0016, "step": 2975 }, { "epoch": 0.9823182711198428, "grad_norm": 0.0016925218515098095, "learning_rate": 3.738023044269254e-05, "loss": 0.0144, "step": 3000 }, { "epoch": 0.9905042567125082, "grad_norm": 0.007691397797316313, "learning_rate": 3.7228623408126137e-05, "loss": 0.0264, "step": 3025 }, { "epoch": 0.9986902423051736, "grad_norm": 0.004836967680603266, "learning_rate": 3.707701637355974e-05, "loss": 0.0003, "step": 3050 }, { "epoch": 1.0, "eval_accuracy": 0.9955795677799607, "eval_auc": 0.9977481210079818, "eval_f1": 0.8187919463087249, "eval_loss": 0.014572993852198124, "eval_precision": 0.8133333333333334, "eval_recall": 0.8243243243243243, "eval_runtime": 74.4335, "eval_samples_per_second": 82.06, "eval_steps_per_second": 5.132, "step": 3054 }, { "epoch": 1.006876227897839, "grad_norm": 0.0026801538188010454, "learning_rate": 3.6925409338993335e-05, "loss": 0.0003, "step": 3075 }, { "epoch": 1.0150622134905043, "grad_norm": 0.0013839630410075188, "learning_rate": 3.677380230442693e-05, "loss": 0.0415, "step": 3100 }, { "epoch": 1.0232481990831697, "grad_norm": 0.0052743577398359776, "learning_rate": 3.662219526986052e-05, "loss": 0.0551, "step": 3125 }, { "epoch": 1.031434184675835, "grad_norm": 0.001981929875910282, "learning_rate": 3.6470588235294114e-05, "loss": 0.0011, "step": 3150 }, { "epoch": 1.0396201702685004, "grad_norm": 0.005122967530041933, "learning_rate": 3.631898120072772e-05, "loss": 0.0013, "step": 3175 }, { "epoch": 1.0478061558611658, "grad_norm": 0.0008022825350053608, "learning_rate": 3.616737416616131e-05, "loss": 0.0001, "step": 3200 }, { "epoch": 1.0559921414538311, "grad_norm": 0.002690242137759924, "learning_rate": 3.601576713159491e-05, "loss": 0.0, "step": 3225 }, { "epoch": 1.0641781270464965, "grad_norm": 0.0003424021415412426, "learning_rate": 3.5864160097028504e-05, "loss": 0.0066, "step": 3250 }, { "epoch": 1.0723641126391619, "grad_norm": 0.0005132851074449718, "learning_rate": 3.57125530624621e-05, "loss": 0.0145, "step": 3275 }, { "epoch": 1.080550098231827, "grad_norm": 0.0189173873513937, "learning_rate": 3.5560946027895695e-05, "loss": 0.0465, "step": 3300 }, { "epoch": 1.0887360838244924, "grad_norm": 0.0036357955541461706, "learning_rate": 3.54093389933293e-05, "loss": 0.0492, "step": 3325 }, { "epoch": 1.0969220694171578, "grad_norm": 0.003961147274821997, "learning_rate": 3.525773195876289e-05, "loss": 0.039, "step": 3350 }, { "epoch": 1.1051080550098231, "grad_norm": 0.003041486954316497, "learning_rate": 3.510612492419648e-05, "loss": 0.0123, "step": 3375 }, { "epoch": 1.1132940406024885, "grad_norm": 0.001711357617750764, "learning_rate": 3.495451788963008e-05, "loss": 0.0003, "step": 3400 }, { "epoch": 1.1214800261951539, "grad_norm": 0.0013232220662757754, "learning_rate": 3.480291085506367e-05, "loss": 0.0096, "step": 3425 }, { "epoch": 1.1296660117878192, "grad_norm": 0.000497565430123359, "learning_rate": 3.4651303820497275e-05, "loss": 0.0233, "step": 3450 }, { "epoch": 1.1378519973804846, "grad_norm": 0.005278216674923897, "learning_rate": 3.449969678593087e-05, "loss": 0.0399, "step": 3475 }, { "epoch": 1.14603798297315, "grad_norm": 0.3594399392604828, "learning_rate": 3.4348089751364466e-05, "loss": 0.068, "step": 3500 }, { "epoch": 1.1542239685658153, "grad_norm": 0.013597418554127216, "learning_rate": 3.419648271679806e-05, "loss": 0.0018, "step": 3525 }, { "epoch": 1.1624099541584807, "grad_norm": 0.004102836363017559, "learning_rate": 3.404487568223166e-05, "loss": 0.0292, "step": 3550 }, { "epoch": 1.170595939751146, "grad_norm": 0.0014996347017586231, "learning_rate": 3.389326864766525e-05, "loss": 0.0114, "step": 3575 }, { "epoch": 1.1787819253438114, "grad_norm": 0.018868234008550644, "learning_rate": 3.374166161309885e-05, "loss": 0.0169, "step": 3600 }, { "epoch": 1.1869679109364768, "grad_norm": 0.001259682234376669, "learning_rate": 3.3590054578532444e-05, "loss": 0.0042, "step": 3625 }, { "epoch": 1.1951538965291422, "grad_norm": 0.15819938480854034, "learning_rate": 3.343844754396604e-05, "loss": 0.0007, "step": 3650 }, { "epoch": 1.2033398821218075, "grad_norm": 0.0013165829004719853, "learning_rate": 3.3286840509399635e-05, "loss": 0.0357, "step": 3675 }, { "epoch": 1.211525867714473, "grad_norm": 0.0017084279097616673, "learning_rate": 3.313523347483323e-05, "loss": 0.0034, "step": 3700 }, { "epoch": 1.2197118533071383, "grad_norm": 0.0006795689114369452, "learning_rate": 3.2983626440266826e-05, "loss": 0.0004, "step": 3725 }, { "epoch": 1.2278978388998034, "grad_norm": 0.0009630739223212004, "learning_rate": 3.283201940570043e-05, "loss": 0.0133, "step": 3750 }, { "epoch": 1.2360838244924688, "grad_norm": 0.0106749152764678, "learning_rate": 3.2680412371134024e-05, "loss": 0.0173, "step": 3775 }, { "epoch": 1.2442698100851342, "grad_norm": 0.001351616345345974, "learning_rate": 3.252880533656762e-05, "loss": 0.0023, "step": 3800 }, { "epoch": 1.2524557956777995, "grad_norm": 0.0006208529230207205, "learning_rate": 3.2377198302001215e-05, "loss": 0.0, "step": 3825 }, { "epoch": 1.260641781270465, "grad_norm": 0.0005773603334091604, "learning_rate": 3.222559126743481e-05, "loss": 0.0373, "step": 3850 }, { "epoch": 1.2688277668631303, "grad_norm": 0.0014631077647209167, "learning_rate": 3.2073984232868406e-05, "loss": 0.0025, "step": 3875 }, { "epoch": 1.2770137524557956, "grad_norm": 0.0002054146461887285, "learning_rate": 3.1922377198302e-05, "loss": 0.0013, "step": 3900 }, { "epoch": 1.285199738048461, "grad_norm": 0.0010797285940498114, "learning_rate": 3.17707701637356e-05, "loss": 0.0427, "step": 3925 }, { "epoch": 1.2933857236411264, "grad_norm": 0.022801030427217484, "learning_rate": 3.161916312916919e-05, "loss": 0.0297, "step": 3950 }, { "epoch": 1.3015717092337917, "grad_norm": 0.33411428332328796, "learning_rate": 3.146755609460279e-05, "loss": 0.0412, "step": 3975 }, { "epoch": 1.309757694826457, "grad_norm": 0.014158491976559162, "learning_rate": 3.1315949060036384e-05, "loss": 0.0185, "step": 4000 }, { "epoch": 1.3179436804191225, "grad_norm": 0.00048260248149745166, "learning_rate": 3.116434202546999e-05, "loss": 0.0, "step": 4025 }, { "epoch": 1.3261296660117878, "grad_norm": 0.19578149914741516, "learning_rate": 3.101273499090358e-05, "loss": 0.0772, "step": 4050 }, { "epoch": 1.3343156516044532, "grad_norm": 0.0038196328096091747, "learning_rate": 3.086112795633718e-05, "loss": 0.0031, "step": 4075 }, { "epoch": 1.3425016371971186, "grad_norm": 0.001698371721431613, "learning_rate": 3.070952092177077e-05, "loss": 0.054, "step": 4100 }, { "epoch": 1.350687622789784, "grad_norm": 0.0078005073592066765, "learning_rate": 3.055791388720437e-05, "loss": 0.0095, "step": 4125 }, { "epoch": 1.3588736083824493, "grad_norm": 0.0008572742226533592, "learning_rate": 3.040630685263796e-05, "loss": 0.0508, "step": 4150 }, { "epoch": 1.3670595939751147, "grad_norm": 0.011146394535899162, "learning_rate": 3.0254699818071557e-05, "loss": 0.0004, "step": 4175 }, { "epoch": 1.37524557956778, "grad_norm": 0.003408160526305437, "learning_rate": 3.0103092783505156e-05, "loss": 0.0431, "step": 4200 }, { "epoch": 1.3834315651604454, "grad_norm": 0.01908206194639206, "learning_rate": 2.995148574893875e-05, "loss": 0.0387, "step": 4225 }, { "epoch": 1.3916175507531108, "grad_norm": 0.001946812029927969, "learning_rate": 2.9799878714372347e-05, "loss": 0.0005, "step": 4250 }, { "epoch": 1.3998035363457761, "grad_norm": 0.008685130625963211, "learning_rate": 2.9648271679805946e-05, "loss": 0.0786, "step": 4275 }, { "epoch": 1.4079895219384415, "grad_norm": 0.07280147820711136, "learning_rate": 2.949666464523954e-05, "loss": 0.0217, "step": 4300 }, { "epoch": 1.4161755075311069, "grad_norm": 0.01758684776723385, "learning_rate": 2.9345057610673137e-05, "loss": 0.0042, "step": 4325 }, { "epoch": 1.424361493123772, "grad_norm": 0.003927039448171854, "learning_rate": 2.9193450576106736e-05, "loss": 0.0033, "step": 4350 }, { "epoch": 1.4325474787164374, "grad_norm": 0.003762061009183526, "learning_rate": 2.904184354154033e-05, "loss": 0.0313, "step": 4375 }, { "epoch": 1.4407334643091028, "grad_norm": 0.00642773462459445, "learning_rate": 2.8890236506973924e-05, "loss": 0.0003, "step": 4400 }, { "epoch": 1.4489194499017681, "grad_norm": 0.005841416772454977, "learning_rate": 2.873862947240752e-05, "loss": 0.0001, "step": 4425 }, { "epoch": 1.4571054354944335, "grad_norm": 0.0015138218877837062, "learning_rate": 2.8587022437841115e-05, "loss": 0.0001, "step": 4450 }, { "epoch": 1.4652914210870989, "grad_norm": 0.005790230352431536, "learning_rate": 2.8435415403274714e-05, "loss": 0.0166, "step": 4475 }, { "epoch": 1.4734774066797642, "grad_norm": 0.002549233613535762, "learning_rate": 2.828380836870831e-05, "loss": 0.0006, "step": 4500 }, { "epoch": 1.4816633922724296, "grad_norm": 0.0019311434589326382, "learning_rate": 2.8132201334141905e-05, "loss": 0.0094, "step": 4525 }, { "epoch": 1.489849377865095, "grad_norm": 0.0018175276927649975, "learning_rate": 2.7980594299575504e-05, "loss": 0.0179, "step": 4550 }, { "epoch": 1.4980353634577603, "grad_norm": 0.00037457983125932515, "learning_rate": 2.78289872650091e-05, "loss": 0.0, "step": 4575 }, { "epoch": 1.5062213490504257, "grad_norm": 0.0009425774333067238, "learning_rate": 2.7677380230442695e-05, "loss": 0.0, "step": 4600 }, { "epoch": 1.514407334643091, "grad_norm": 0.00077505485387519, "learning_rate": 2.752577319587629e-05, "loss": 0.0002, "step": 4625 }, { "epoch": 1.5225933202357562, "grad_norm": 0.00036527996417135, "learning_rate": 2.737416616130989e-05, "loss": 0.0002, "step": 4650 }, { "epoch": 1.5307793058284216, "grad_norm": 0.0006952973199076951, "learning_rate": 2.7222559126743478e-05, "loss": 0.0, "step": 4675 }, { "epoch": 1.538965291421087, "grad_norm": 0.0010176642099395394, "learning_rate": 2.7070952092177077e-05, "loss": 0.0465, "step": 4700 }, { "epoch": 1.5471512770137523, "grad_norm": 0.00043089999235235155, "learning_rate": 2.6919345057610673e-05, "loss": 0.013, "step": 4725 }, { "epoch": 1.5553372626064177, "grad_norm": 0.0003544854116626084, "learning_rate": 2.676773802304427e-05, "loss": 0.0001, "step": 4750 }, { "epoch": 1.563523248199083, "grad_norm": 0.0013717171968892217, "learning_rate": 2.6616130988477867e-05, "loss": 0.0002, "step": 4775 }, { "epoch": 1.5717092337917484, "grad_norm": 0.0011255746940150857, "learning_rate": 2.6464523953911463e-05, "loss": 0.0002, "step": 4800 }, { "epoch": 1.5798952193844138, "grad_norm": 0.0009519586456008255, "learning_rate": 2.631291691934506e-05, "loss": 0.0042, "step": 4825 }, { "epoch": 1.5880812049770792, "grad_norm": 0.0012405363377183676, "learning_rate": 2.6161309884778657e-05, "loss": 0.0, "step": 4850 }, { "epoch": 1.5962671905697445, "grad_norm": 0.0004118427459616214, "learning_rate": 2.6009702850212253e-05, "loss": 0.0515, "step": 4875 }, { "epoch": 1.60445317616241, "grad_norm": 0.0020054271444678307, "learning_rate": 2.585809581564585e-05, "loss": 0.0162, "step": 4900 }, { "epoch": 1.6126391617550753, "grad_norm": 0.0009532085387036204, "learning_rate": 2.570648878107944e-05, "loss": 0.0003, "step": 4925 }, { "epoch": 1.6208251473477406, "grad_norm": 0.009459923952817917, "learning_rate": 2.5554881746513036e-05, "loss": 0.0538, "step": 4950 }, { "epoch": 1.629011132940406, "grad_norm": 0.0009569909307174385, "learning_rate": 2.5403274711946635e-05, "loss": 0.0346, "step": 4975 }, { "epoch": 1.6371971185330714, "grad_norm": 0.0007560375961475074, "learning_rate": 2.525166767738023e-05, "loss": 0.0362, "step": 5000 }, { "epoch": 1.6453831041257367, "grad_norm": 0.0004632064083125442, "learning_rate": 2.5100060642813826e-05, "loss": 0.0005, "step": 5025 }, { "epoch": 1.653569089718402, "grad_norm": 0.0006956621073186398, "learning_rate": 2.4948453608247425e-05, "loss": 0.0028, "step": 5050 }, { "epoch": 1.6617550753110675, "grad_norm": 0.0004222158167976886, "learning_rate": 2.479684657368102e-05, "loss": 0.0002, "step": 5075 }, { "epoch": 1.6699410609037328, "grad_norm": 0.0006862828740850091, "learning_rate": 2.4645239539114617e-05, "loss": 0.0395, "step": 5100 }, { "epoch": 1.6781270464963982, "grad_norm": 0.001836837618611753, "learning_rate": 2.4493632504548212e-05, "loss": 0.0268, "step": 5125 }, { "epoch": 1.6863130320890636, "grad_norm": 0.0008472290937788785, "learning_rate": 2.4342025469981808e-05, "loss": 0.0028, "step": 5150 }, { "epoch": 1.694499017681729, "grad_norm": 0.0012132123811170459, "learning_rate": 2.4190418435415403e-05, "loss": 0.0001, "step": 5175 }, { "epoch": 1.7026850032743943, "grad_norm": 0.0005179749568924308, "learning_rate": 2.4038811400849002e-05, "loss": 0.0397, "step": 5200 }, { "epoch": 1.7108709888670597, "grad_norm": 0.0008750595734454691, "learning_rate": 2.3887204366282598e-05, "loss": 0.0002, "step": 5225 }, { "epoch": 1.719056974459725, "grad_norm": 0.02541407383978367, "learning_rate": 2.373559733171619e-05, "loss": 0.0001, "step": 5250 }, { "epoch": 1.7272429600523904, "grad_norm": 0.02840971015393734, "learning_rate": 2.358399029714979e-05, "loss": 0.0002, "step": 5275 }, { "epoch": 1.7354289456450558, "grad_norm": 0.0008489146712236106, "learning_rate": 2.3432383262583385e-05, "loss": 0.0142, "step": 5300 }, { "epoch": 1.7436149312377212, "grad_norm": 0.002642701379954815, "learning_rate": 2.328077622801698e-05, "loss": 0.0562, "step": 5325 }, { "epoch": 1.7518009168303865, "grad_norm": 0.004949661903083324, "learning_rate": 2.312916919345058e-05, "loss": 0.0147, "step": 5350 }, { "epoch": 1.759986902423052, "grad_norm": 0.11022938042879105, "learning_rate": 2.297756215888417e-05, "loss": 0.028, "step": 5375 }, { "epoch": 1.768172888015717, "grad_norm": 0.006674617063254118, "learning_rate": 2.282595512431777e-05, "loss": 0.0014, "step": 5400 }, { "epoch": 1.7763588736083824, "grad_norm": 0.0017073052003979683, "learning_rate": 2.2674348089751366e-05, "loss": 0.0028, "step": 5425 }, { "epoch": 1.7845448592010478, "grad_norm": 3.2796061038970947, "learning_rate": 2.252274105518496e-05, "loss": 0.0025, "step": 5450 }, { "epoch": 1.7927308447937131, "grad_norm": 0.0006569511606357992, "learning_rate": 2.237113402061856e-05, "loss": 0.0009, "step": 5475 }, { "epoch": 1.8009168303863785, "grad_norm": 0.00020327128004282713, "learning_rate": 2.2219526986052153e-05, "loss": 0.0, "step": 5500 }, { "epoch": 1.8091028159790439, "grad_norm": 0.041166484355926514, "learning_rate": 2.2067919951485748e-05, "loss": 0.0937, "step": 5525 }, { "epoch": 1.8172888015717092, "grad_norm": 0.05974828451871872, "learning_rate": 2.1916312916919347e-05, "loss": 0.0147, "step": 5550 }, { "epoch": 1.8254747871643746, "grad_norm": 0.004328828305006027, "learning_rate": 2.1764705882352943e-05, "loss": 0.0041, "step": 5575 }, { "epoch": 1.83366077275704, "grad_norm": 0.004382645711302757, "learning_rate": 2.1613098847786538e-05, "loss": 0.0006, "step": 5600 }, { "epoch": 1.8418467583497053, "grad_norm": 0.005558451171964407, "learning_rate": 2.1461491813220134e-05, "loss": 0.0002, "step": 5625 }, { "epoch": 1.8500327439423707, "grad_norm": 0.02043689414858818, "learning_rate": 2.130988477865373e-05, "loss": 0.044, "step": 5650 }, { "epoch": 1.8582187295350359, "grad_norm": 0.007403884083032608, "learning_rate": 2.1158277744087325e-05, "loss": 0.0295, "step": 5675 }, { "epoch": 1.8664047151277012, "grad_norm": 0.002108983462676406, "learning_rate": 2.1006670709520924e-05, "loss": 0.0033, "step": 5700 }, { "epoch": 1.8745907007203666, "grad_norm": 0.0540454126894474, "learning_rate": 2.085506367495452e-05, "loss": 0.0247, "step": 5725 }, { "epoch": 1.882776686313032, "grad_norm": 0.02874263934791088, "learning_rate": 2.0703456640388115e-05, "loss": 0.0117, "step": 5750 }, { "epoch": 1.8909626719056973, "grad_norm": 0.0010341499000787735, "learning_rate": 2.055184960582171e-05, "loss": 0.0029, "step": 5775 }, { "epoch": 1.8991486574983627, "grad_norm": 0.0022526606917381287, "learning_rate": 2.0400242571255306e-05, "loss": 0.0404, "step": 5800 }, { "epoch": 1.907334643091028, "grad_norm": 0.0009332990157417953, "learning_rate": 2.0248635536688905e-05, "loss": 0.0001, "step": 5825 }, { "epoch": 1.9155206286836934, "grad_norm": 0.01605447567999363, "learning_rate": 2.00970285021225e-05, "loss": 0.0744, "step": 5850 }, { "epoch": 1.9237066142763588, "grad_norm": 0.057828161865472794, "learning_rate": 1.9945421467556096e-05, "loss": 0.038, "step": 5875 }, { "epoch": 1.9318925998690242, "grad_norm": 0.01713876612484455, "learning_rate": 1.9793814432989692e-05, "loss": 0.0071, "step": 5900 }, { "epoch": 1.9400785854616895, "grad_norm": 0.0016962456284090877, "learning_rate": 1.9642207398423287e-05, "loss": 0.0075, "step": 5925 }, { "epoch": 1.948264571054355, "grad_norm": 0.001768580754287541, "learning_rate": 1.9490600363856883e-05, "loss": 0.0008, "step": 5950 }, { "epoch": 1.9564505566470203, "grad_norm": 0.005728750489652157, "learning_rate": 1.9338993329290482e-05, "loss": 0.0627, "step": 5975 }, { "epoch": 1.9646365422396856, "grad_norm": 7.606292724609375, "learning_rate": 1.9187386294724078e-05, "loss": 0.0418, "step": 6000 }, { "epoch": 1.972822527832351, "grad_norm": 0.0018708339193835855, "learning_rate": 1.903577926015767e-05, "loss": 0.0006, "step": 6025 }, { "epoch": 1.9810085134250164, "grad_norm": 7.210286617279053, "learning_rate": 1.888417222559127e-05, "loss": 0.0324, "step": 6050 }, { "epoch": 1.9891944990176817, "grad_norm": 0.021973388269543648, "learning_rate": 1.8732565191024864e-05, "loss": 0.0005, "step": 6075 }, { "epoch": 1.9973804846103471, "grad_norm": 0.2803119421005249, "learning_rate": 1.858095815645846e-05, "loss": 0.0454, "step": 6100 }, { "epoch": 2.0, "eval_accuracy": 0.9975442043222004, "eval_auc": 0.9995912800437162, "eval_f1": 0.9044585987261147, "eval_loss": 0.008845364674925804, "eval_precision": 0.8554216867469879, "eval_recall": 0.9594594594594594, "eval_runtime": 74.792, "eval_samples_per_second": 81.666, "eval_steps_per_second": 5.107, "step": 6108 }, { "epoch": 2.0055664702030125, "grad_norm": 0.022678203880786896, "learning_rate": 1.8435415403274715e-05, "loss": 0.0121, "step": 6125 }, { "epoch": 2.013752455795678, "grad_norm": 0.0033217219170182943, "learning_rate": 1.8283808368708307e-05, "loss": 0.0446, "step": 6150 }, { "epoch": 2.021938441388343, "grad_norm": 0.006960027851164341, "learning_rate": 1.8132201334141906e-05, "loss": 0.0067, "step": 6175 }, { "epoch": 2.0301244269810086, "grad_norm": 1.543547511100769, "learning_rate": 1.79805942995755e-05, "loss": 0.0201, "step": 6200 }, { "epoch": 2.038310412573674, "grad_norm": 0.01694468781352043, "learning_rate": 1.7828987265009097e-05, "loss": 0.0258, "step": 6225 }, { "epoch": 2.0464963981663393, "grad_norm": 0.004578434396535158, "learning_rate": 1.7677380230442696e-05, "loss": 0.0006, "step": 6250 }, { "epoch": 2.0546823837590047, "grad_norm": 1.5190191268920898, "learning_rate": 1.7525773195876288e-05, "loss": 0.0126, "step": 6275 }, { "epoch": 2.06286836935167, "grad_norm": 0.0018938812427222729, "learning_rate": 1.7374166161309884e-05, "loss": 0.0003, "step": 6300 }, { "epoch": 2.0710543549443354, "grad_norm": 0.0011117063695564866, "learning_rate": 1.7222559126743483e-05, "loss": 0.0002, "step": 6325 }, { "epoch": 2.079240340537001, "grad_norm": 0.000687290565110743, "learning_rate": 1.7070952092177078e-05, "loss": 0.0154, "step": 6350 }, { "epoch": 2.087426326129666, "grad_norm": 0.001672151731327176, "learning_rate": 1.6919345057610674e-05, "loss": 0.0006, "step": 6375 }, { "epoch": 2.0956123117223315, "grad_norm": 5.109748363494873, "learning_rate": 1.676773802304427e-05, "loss": 0.0352, "step": 6400 }, { "epoch": 2.103798297314997, "grad_norm": 0.020103197544813156, "learning_rate": 1.6616130988477865e-05, "loss": 0.0007, "step": 6425 }, { "epoch": 2.1119842829076623, "grad_norm": 0.005654872860759497, "learning_rate": 1.646452395391146e-05, "loss": 0.0004, "step": 6450 }, { "epoch": 2.1201702685003276, "grad_norm": 0.009023274295032024, "learning_rate": 1.631291691934506e-05, "loss": 0.0018, "step": 6475 }, { "epoch": 2.128356254092993, "grad_norm": 0.0016646806616336107, "learning_rate": 1.6161309884778655e-05, "loss": 0.0001, "step": 6500 }, { "epoch": 2.1365422396856584, "grad_norm": 0.0016474899603053927, "learning_rate": 1.600970285021225e-05, "loss": 0.0005, "step": 6525 }, { "epoch": 2.1447282252783237, "grad_norm": 0.014943123795092106, "learning_rate": 1.5858095815645846e-05, "loss": 0.0147, "step": 6550 }, { "epoch": 2.1529142108709887, "grad_norm": 0.0021474878303706646, "learning_rate": 1.570648878107944e-05, "loss": 0.0002, "step": 6575 }, { "epoch": 2.161100196463654, "grad_norm": 0.0017727293306961656, "learning_rate": 1.555488174651304e-05, "loss": 0.0, "step": 6600 }, { "epoch": 2.1692861820563194, "grad_norm": 0.0010321360314264894, "learning_rate": 1.5403274711946636e-05, "loss": 0.0001, "step": 6625 }, { "epoch": 2.1774721676489848, "grad_norm": 0.0004758843861054629, "learning_rate": 1.5251667677380232e-05, "loss": 0.0114, "step": 6650 }, { "epoch": 2.18565815324165, "grad_norm": 0.0012892503291368484, "learning_rate": 1.5100060642813826e-05, "loss": 0.0262, "step": 6675 }, { "epoch": 2.1938441388343155, "grad_norm": 0.01857004500925541, "learning_rate": 1.4948453608247423e-05, "loss": 0.0003, "step": 6700 }, { "epoch": 2.202030124426981, "grad_norm": 0.0008667947258800268, "learning_rate": 1.479684657368102e-05, "loss": 0.0099, "step": 6725 }, { "epoch": 2.2102161100196462, "grad_norm": 0.001100408611819148, "learning_rate": 1.4645239539114616e-05, "loss": 0.0341, "step": 6750 }, { "epoch": 2.2184020956123116, "grad_norm": 0.001291294815018773, "learning_rate": 1.4493632504548213e-05, "loss": 0.0001, "step": 6775 }, { "epoch": 2.226588081204977, "grad_norm": 0.011078906245529652, "learning_rate": 1.4342025469981807e-05, "loss": 0.0479, "step": 6800 }, { "epoch": 2.2347740667976423, "grad_norm": 0.0018653717124834657, "learning_rate": 1.4190418435415404e-05, "loss": 0.0026, "step": 6825 }, { "epoch": 2.2429600523903077, "grad_norm": 0.0010668339673429728, "learning_rate": 1.4038811400849e-05, "loss": 0.0005, "step": 6850 }, { "epoch": 2.251146037982973, "grad_norm": 0.002098933095112443, "learning_rate": 1.3887204366282597e-05, "loss": 0.0005, "step": 6875 }, { "epoch": 2.2593320235756384, "grad_norm": 0.0008214248227886856, "learning_rate": 1.3735597331716193e-05, "loss": 0.0185, "step": 6900 }, { "epoch": 2.267518009168304, "grad_norm": 0.001296183792874217, "learning_rate": 1.3583990297149786e-05, "loss": 0.0354, "step": 6925 }, { "epoch": 2.275703994760969, "grad_norm": 0.0009354737703688443, "learning_rate": 1.3432383262583384e-05, "loss": 0.0127, "step": 6950 }, { "epoch": 2.2838899803536346, "grad_norm": 0.0020546901505440474, "learning_rate": 1.3280776228016981e-05, "loss": 0.0004, "step": 6975 }, { "epoch": 2.2920759659463, "grad_norm": 0.02266145683825016, "learning_rate": 1.3129169193450577e-05, "loss": 0.0364, "step": 7000 }, { "epoch": 2.3002619515389653, "grad_norm": 0.010976303368806839, "learning_rate": 1.2977562158884174e-05, "loss": 0.0018, "step": 7025 }, { "epoch": 2.3084479371316307, "grad_norm": 0.006094989832490683, "learning_rate": 1.2825955124317768e-05, "loss": 0.0091, "step": 7050 }, { "epoch": 2.316633922724296, "grad_norm": 0.07841998338699341, "learning_rate": 1.2674348089751365e-05, "loss": 0.0094, "step": 7075 }, { "epoch": 2.3248199083169614, "grad_norm": 2.630479335784912, "learning_rate": 1.252274105518496e-05, "loss": 0.0029, "step": 7100 }, { "epoch": 2.3330058939096268, "grad_norm": 0.0039812372997403145, "learning_rate": 1.2371134020618558e-05, "loss": 0.0086, "step": 7125 }, { "epoch": 2.341191879502292, "grad_norm": 0.0007643400458618999, "learning_rate": 1.2219526986052153e-05, "loss": 0.0001, "step": 7150 }, { "epoch": 2.3493778650949575, "grad_norm": 0.0021422533318400383, "learning_rate": 1.2067919951485749e-05, "loss": 0.0029, "step": 7175 }, { "epoch": 2.357563850687623, "grad_norm": 0.0017942421836778522, "learning_rate": 1.1916312916919346e-05, "loss": 0.0026, "step": 7200 }, { "epoch": 2.3657498362802882, "grad_norm": 0.0007688822224736214, "learning_rate": 1.1764705882352942e-05, "loss": 0.0018, "step": 7225 }, { "epoch": 2.3739358218729536, "grad_norm": 0.0004730868386104703, "learning_rate": 1.1613098847786537e-05, "loss": 0.0028, "step": 7250 }, { "epoch": 2.382121807465619, "grad_norm": 0.0004939533537253737, "learning_rate": 1.1461491813220133e-05, "loss": 0.0003, "step": 7275 }, { "epoch": 2.3903077930582843, "grad_norm": 0.0010573838371783495, "learning_rate": 1.130988477865373e-05, "loss": 0.0158, "step": 7300 }, { "epoch": 2.3984937786509497, "grad_norm": 0.0003521484904922545, "learning_rate": 1.1158277744087328e-05, "loss": 0.0008, "step": 7325 }, { "epoch": 2.406679764243615, "grad_norm": 0.00083108467515558, "learning_rate": 1.1006670709520921e-05, "loss": 0.0209, "step": 7350 }, { "epoch": 2.4148657498362804, "grad_norm": 0.00046702896361239254, "learning_rate": 1.0855063674954519e-05, "loss": 0.0002, "step": 7375 }, { "epoch": 2.423051735428946, "grad_norm": 0.0009466594783589244, "learning_rate": 1.0703456640388114e-05, "loss": 0.0534, "step": 7400 }, { "epoch": 2.431237721021611, "grad_norm": 0.0005027965526096523, "learning_rate": 1.055184960582171e-05, "loss": 0.0031, "step": 7425 }, { "epoch": 2.4394237066142765, "grad_norm": 0.00883590430021286, "learning_rate": 1.0400242571255307e-05, "loss": 0.0078, "step": 7450 }, { "epoch": 2.4476096922069415, "grad_norm": 0.0009790252661332488, "learning_rate": 1.0248635536688903e-05, "loss": 0.002, "step": 7475 }, { "epoch": 2.455795677799607, "grad_norm": 0.017697228118777275, "learning_rate": 1.00970285021225e-05, "loss": 0.0049, "step": 7500 }, { "epoch": 2.463981663392272, "grad_norm": 0.0005443913978524506, "learning_rate": 9.945421467556094e-06, "loss": 0.0672, "step": 7525 }, { "epoch": 2.4721676489849376, "grad_norm": 0.0002733923611231148, "learning_rate": 9.793814432989691e-06, "loss": 0.0089, "step": 7550 }, { "epoch": 2.480353634577603, "grad_norm": 0.002381040947511792, "learning_rate": 9.642207398423288e-06, "loss": 0.0001, "step": 7575 }, { "epoch": 2.4885396201702683, "grad_norm": 0.0006111008697189391, "learning_rate": 9.490600363856882e-06, "loss": 0.0, "step": 7600 }, { "epoch": 2.4967256057629337, "grad_norm": 0.0005736036109738052, "learning_rate": 9.33899332929048e-06, "loss": 0.0176, "step": 7625 }, { "epoch": 2.504911591355599, "grad_norm": 0.00394043792039156, "learning_rate": 9.187386294724077e-06, "loss": 0.0039, "step": 7650 }, { "epoch": 2.5130975769482644, "grad_norm": 0.0005367947742342949, "learning_rate": 9.035779260157672e-06, "loss": 0.0001, "step": 7675 }, { "epoch": 2.52128356254093, "grad_norm": 0.004565802868455648, "learning_rate": 8.884172225591268e-06, "loss": 0.0255, "step": 7700 }, { "epoch": 2.529469548133595, "grad_norm": 0.0027460469864308834, "learning_rate": 8.732565191024863e-06, "loss": 0.0007, "step": 7725 }, { "epoch": 2.5376555337262605, "grad_norm": 0.006453353445976973, "learning_rate": 8.58095815645846e-06, "loss": 0.0003, "step": 7750 }, { "epoch": 2.545841519318926, "grad_norm": 0.05628466606140137, "learning_rate": 8.429351121892056e-06, "loss": 0.0005, "step": 7775 }, { "epoch": 2.5540275049115913, "grad_norm": 0.0020795781165361404, "learning_rate": 8.277744087325652e-06, "loss": 0.0001, "step": 7800 }, { "epoch": 2.5622134905042566, "grad_norm": 6.260252952575684, "learning_rate": 8.126137052759249e-06, "loss": 0.065, "step": 7825 }, { "epoch": 2.570399476096922, "grad_norm": 0.032053545117378235, "learning_rate": 7.974530018192845e-06, "loss": 0.0005, "step": 7850 }, { "epoch": 2.5785854616895874, "grad_norm": 0.0007737306877970695, "learning_rate": 7.82292298362644e-06, "loss": 0.0005, "step": 7875 }, { "epoch": 2.5867714472822527, "grad_norm": 0.0028309288900345564, "learning_rate": 7.671315949060038e-06, "loss": 0.0004, "step": 7900 }, { "epoch": 2.594957432874918, "grad_norm": 0.002330320654436946, "learning_rate": 7.519708914493632e-06, "loss": 0.0001, "step": 7925 }, { "epoch": 2.6031434184675835, "grad_norm": 0.002036983147263527, "learning_rate": 7.3681018799272296e-06, "loss": 0.0011, "step": 7950 }, { "epoch": 2.611329404060249, "grad_norm": 0.0005035591893829405, "learning_rate": 7.216494845360824e-06, "loss": 0.0012, "step": 7975 }, { "epoch": 2.619515389652914, "grad_norm": 0.011760620400309563, "learning_rate": 7.064887810794421e-06, "loss": 0.0015, "step": 8000 }, { "epoch": 2.6277013752455796, "grad_norm": 0.07406982779502869, "learning_rate": 6.913280776228018e-06, "loss": 0.0001, "step": 8025 }, { "epoch": 2.635887360838245, "grad_norm": 0.002441051648929715, "learning_rate": 6.761673741661613e-06, "loss": 0.0008, "step": 8050 }, { "epoch": 2.6440733464309103, "grad_norm": 0.001771932584233582, "learning_rate": 6.61006670709521e-06, "loss": 0.0261, "step": 8075 }, { "epoch": 2.6522593320235757, "grad_norm": 0.003007644321769476, "learning_rate": 6.458459672528806e-06, "loss": 0.0027, "step": 8100 }, { "epoch": 2.660445317616241, "grad_norm": 0.0029620120767503977, "learning_rate": 6.306852637962402e-06, "loss": 0.0001, "step": 8125 }, { "epoch": 2.6686313032089064, "grad_norm": 0.002668931847438216, "learning_rate": 6.161309884778654e-06, "loss": 0.0218, "step": 8150 }, { "epoch": 2.6768172888015718, "grad_norm": 0.005034355912357569, "learning_rate": 6.0097028502122506e-06, "loss": 0.0002, "step": 8175 }, { "epoch": 2.685003274394237, "grad_norm": 0.2363610863685608, "learning_rate": 5.858095815645846e-06, "loss": 0.0237, "step": 8200 }, { "epoch": 2.6931892599869025, "grad_norm": 0.0029050330631434917, "learning_rate": 5.7064887810794426e-06, "loss": 0.0142, "step": 8225 }, { "epoch": 2.701375245579568, "grad_norm": 0.0002962597936857492, "learning_rate": 5.554881746513038e-06, "loss": 0.0008, "step": 8250 }, { "epoch": 2.7095612311722332, "grad_norm": 0.002445927122607827, "learning_rate": 5.4032747119466346e-06, "loss": 0.0395, "step": 8275 }, { "epoch": 2.7177472167648986, "grad_norm": 4.508545875549316, "learning_rate": 5.251667677380231e-06, "loss": 0.0135, "step": 8300 }, { "epoch": 2.725933202357564, "grad_norm": 0.001652977429330349, "learning_rate": 5.1000606428138266e-06, "loss": 0.0005, "step": 8325 }, { "epoch": 2.7341191879502293, "grad_norm": 0.007348712533712387, "learning_rate": 4.948453608247423e-06, "loss": 0.0001, "step": 8350 }, { "epoch": 2.7423051735428947, "grad_norm": 0.0047335317358374596, "learning_rate": 4.796846573681019e-06, "loss": 0.0002, "step": 8375 }, { "epoch": 2.75049115913556, "grad_norm": 0.0006250338046811521, "learning_rate": 4.645239539114615e-06, "loss": 0.002, "step": 8400 }, { "epoch": 2.7586771447282254, "grad_norm": 0.0020159140694886446, "learning_rate": 4.493632504548211e-06, "loss": 0.0001, "step": 8425 }, { "epoch": 2.766863130320891, "grad_norm": 0.005961321294307709, "learning_rate": 4.342025469981807e-06, "loss": 0.0825, "step": 8450 }, { "epoch": 2.775049115913556, "grad_norm": 0.004888585302978754, "learning_rate": 4.190418435415403e-06, "loss": 0.0003, "step": 8475 }, { "epoch": 2.7832351015062216, "grad_norm": 0.0033836057409644127, "learning_rate": 4.038811400849e-06, "loss": 0.0002, "step": 8500 }, { "epoch": 2.791421087098887, "grad_norm": 0.048891954123973846, "learning_rate": 3.887204366282595e-06, "loss": 0.0002, "step": 8525 }, { "epoch": 2.7996070726915523, "grad_norm": 0.0019098659977316856, "learning_rate": 3.735597331716192e-06, "loss": 0.0014, "step": 8550 }, { "epoch": 2.8077930582842177, "grad_norm": 0.005250090733170509, "learning_rate": 3.583990297149788e-06, "loss": 0.0019, "step": 8575 }, { "epoch": 2.815979043876883, "grad_norm": 0.0005940294940955937, "learning_rate": 3.432383262583384e-06, "loss": 0.0003, "step": 8600 }, { "epoch": 2.8241650294695484, "grad_norm": 0.00769090885296464, "learning_rate": 3.28077622801698e-06, "loss": 0.0003, "step": 8625 }, { "epoch": 2.8323510150622138, "grad_norm": 0.004769071005284786, "learning_rate": 3.129169193450576e-06, "loss": 0.0082, "step": 8650 }, { "epoch": 2.8405370006548787, "grad_norm": 0.0006792404456064105, "learning_rate": 2.983626440266829e-06, "loss": 0.071, "step": 8675 }, { "epoch": 2.848722986247544, "grad_norm": 0.0006690117879770696, "learning_rate": 2.832019405700425e-06, "loss": 0.0303, "step": 8700 }, { "epoch": 2.8569089718402094, "grad_norm": 0.003535321680828929, "learning_rate": 2.6804123711340204e-06, "loss": 0.0041, "step": 8725 }, { "epoch": 2.865094957432875, "grad_norm": 0.007222822401672602, "learning_rate": 2.528805336567617e-06, "loss": 0.0002, "step": 8750 }, { "epoch": 2.87328094302554, "grad_norm": 0.001671507372520864, "learning_rate": 2.377198302001213e-06, "loss": 0.0102, "step": 8775 }, { "epoch": 2.8814669286182055, "grad_norm": 0.0008472661720588803, "learning_rate": 2.2255912674348092e-06, "loss": 0.0004, "step": 8800 }, { "epoch": 2.889652914210871, "grad_norm": 0.0037155933678150177, "learning_rate": 2.0739842328684052e-06, "loss": 0.0007, "step": 8825 }, { "epoch": 2.8978388998035363, "grad_norm": 0.0019001095788553357, "learning_rate": 1.9223771983020012e-06, "loss": 0.0071, "step": 8850 }, { "epoch": 2.9060248853962016, "grad_norm": 0.01914367638528347, "learning_rate": 1.7707701637355974e-06, "loss": 0.0003, "step": 8875 }, { "epoch": 2.914210870988867, "grad_norm": 0.0017183177405968308, "learning_rate": 1.6191631291691936e-06, "loss": 0.0001, "step": 8900 }, { "epoch": 2.9223968565815324, "grad_norm": 0.000283712986856699, "learning_rate": 1.4675560946027896e-06, "loss": 0.007, "step": 8925 }, { "epoch": 2.9305828421741977, "grad_norm": 0.0013638654490932822, "learning_rate": 1.3159490600363858e-06, "loss": 0.031, "step": 8950 }, { "epoch": 2.938768827766863, "grad_norm": 0.0024313374888151884, "learning_rate": 1.1643420254699818e-06, "loss": 0.0001, "step": 8975 }, { "epoch": 2.9469548133595285, "grad_norm": 0.0011194221442565322, "learning_rate": 1.012734990903578e-06, "loss": 0.0001, "step": 9000 }, { "epoch": 2.955140798952194, "grad_norm": 0.0018601082265377045, "learning_rate": 8.611279563371741e-07, "loss": 0.0091, "step": 9025 }, { "epoch": 2.963326784544859, "grad_norm": 0.001517856726422906, "learning_rate": 7.095209217707701e-07, "loss": 0.0003, "step": 9050 }, { "epoch": 2.9715127701375246, "grad_norm": 0.00028195424238219857, "learning_rate": 5.579138872043664e-07, "loss": 0.0225, "step": 9075 }, { "epoch": 2.97969875573019, "grad_norm": 0.0005502802086994052, "learning_rate": 4.0630685263796246e-07, "loss": 0.0001, "step": 9100 }, { "epoch": 2.9878847413228553, "grad_norm": 0.0016429908573627472, "learning_rate": 2.5469981807155856e-07, "loss": 0.0003, "step": 9125 }, { "epoch": 2.9960707269155207, "grad_norm": 0.0061945500783622265, "learning_rate": 1.0309278350515465e-07, "loss": 0.0033, "step": 9150 }, { "epoch": 3.0, "eval_accuracy": 0.9988539620170268, "eval_auc": 0.9999216153508497, "eval_f1": 0.9523809523809523, "eval_loss": 0.0027391575276851654, "eval_precision": 0.958904109589041, "eval_recall": 0.9459459459459459, "eval_runtime": 74.4951, "eval_samples_per_second": 81.992, "eval_steps_per_second": 5.128, "step": 9162 } ], "logging_steps": 25, "max_steps": 9162, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.944931470697103e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }