{ "best_metric": NaN, "best_model_checkpoint": "autotrain-vkk0q-3g7ic/checkpoint-4732", "epoch": 1.0, "eval_steps": 500, "global_step": 4732, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005283178360101437, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 25 }, { "epoch": 0.010566356720202874, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 50 }, { "epoch": 0.01584953508030431, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 75 }, { "epoch": 0.021132713440405747, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 100 }, { "epoch": 0.026415891800507185, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 125 }, { "epoch": 0.03169907016060862, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 150 }, { "epoch": 0.03698224852071006, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 175 }, { "epoch": 0.042265426880811495, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 200 }, { "epoch": 0.047548605240912936, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 225 }, { "epoch": 0.05283178360101437, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 250 }, { "epoch": 0.058114961961115805, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 275 }, { "epoch": 0.06339814032121724, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 300 }, { "epoch": 0.06868131868131869, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 325 }, { "epoch": 0.07396449704142012, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 350 }, { "epoch": 0.07924767540152156, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 375 }, { "epoch": 0.08453085376162299, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 400 }, { "epoch": 0.08981403212172442, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 425 }, { "epoch": 0.09509721048182587, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 450 }, { "epoch": 0.1003803888419273, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 475 }, { "epoch": 0.10566356720202874, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 500 }, { "epoch": 0.11094674556213018, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 525 }, { "epoch": 0.11622992392223161, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 550 }, { "epoch": 0.12151310228233306, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 575 }, { "epoch": 0.12679628064243448, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 600 }, { "epoch": 0.13207945900253593, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 625 }, { "epoch": 0.13736263736263737, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 650 }, { "epoch": 0.1426458157227388, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 675 }, { "epoch": 0.14792899408284024, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 700 }, { "epoch": 0.15321217244294166, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 725 }, { "epoch": 0.1584953508030431, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 750 }, { "epoch": 0.16377852916314456, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 775 }, { "epoch": 0.16906170752324598, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 800 }, { "epoch": 0.17434488588334743, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 825 }, { "epoch": 0.17962806424344885, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 850 }, { "epoch": 0.1849112426035503, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 875 }, { "epoch": 0.19019442096365174, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 900 }, { "epoch": 0.19547759932375317, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 925 }, { "epoch": 0.2007607776838546, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 950 }, { "epoch": 0.20604395604395603, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 975 }, { "epoch": 0.21132713440405748, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1000 }, { "epoch": 0.21661031276415893, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1025 }, { "epoch": 0.22189349112426035, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1050 }, { "epoch": 0.2271766694843618, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1075 }, { "epoch": 0.23245984784446322, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1100 }, { "epoch": 0.23774302620456467, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1125 }, { "epoch": 0.24302620456466612, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1150 }, { "epoch": 0.24830938292476754, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1175 }, { "epoch": 0.25359256128486896, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1200 }, { "epoch": 0.2588757396449704, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1225 }, { "epoch": 0.26415891800507185, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1250 }, { "epoch": 0.2694420963651733, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1275 }, { "epoch": 0.27472527472527475, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1300 }, { "epoch": 0.28000845308537614, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1325 }, { "epoch": 0.2852916314454776, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1350 }, { "epoch": 0.29057480980557904, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1375 }, { "epoch": 0.2958579881656805, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1400 }, { "epoch": 0.30114116652578193, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1425 }, { "epoch": 0.3064243448858833, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1450 }, { "epoch": 0.3117075232459848, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1475 }, { "epoch": 0.3169907016060862, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1500 }, { "epoch": 0.32227387996618767, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1525 }, { "epoch": 0.3275570583262891, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1550 }, { "epoch": 0.3328402366863905, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1575 }, { "epoch": 0.33812341504649196, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1600 }, { "epoch": 0.3434065934065934, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1625 }, { "epoch": 0.34868977176669486, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1650 }, { "epoch": 0.3539729501267963, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1675 }, { "epoch": 0.3592561284868977, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1700 }, { "epoch": 0.36453930684699914, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1725 }, { "epoch": 0.3698224852071006, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1750 }, { "epoch": 0.37510566356720204, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1775 }, { "epoch": 0.3803888419273035, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1800 }, { "epoch": 0.3856720202874049, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1825 }, { "epoch": 0.39095519864750633, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1850 }, { "epoch": 0.3962383770076078, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1875 }, { "epoch": 0.4015215553677092, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1900 }, { "epoch": 0.4068047337278107, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1925 }, { "epoch": 0.41208791208791207, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1950 }, { "epoch": 0.4173710904480135, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1975 }, { "epoch": 0.42265426880811496, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2000 }, { "epoch": 0.4279374471682164, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2025 }, { "epoch": 0.43322062552831786, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2050 }, { "epoch": 0.43850380388841925, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2075 }, { "epoch": 0.4437869822485207, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2100 }, { "epoch": 0.44907016060862215, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2125 }, { "epoch": 0.4543533389687236, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2150 }, { "epoch": 0.45963651732882504, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2175 }, { "epoch": 0.46491969568892644, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2200 }, { "epoch": 0.4702028740490279, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2225 }, { "epoch": 0.47548605240912933, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2250 }, { "epoch": 0.4807692307692308, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2275 }, { "epoch": 0.48605240912933223, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2300 }, { "epoch": 0.4913355874894336, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2325 }, { "epoch": 0.49661876584953507, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2350 }, { "epoch": 0.5019019442096365, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2375 }, { "epoch": 0.5071851225697379, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2400 }, { "epoch": 0.5124683009298394, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2425 }, { "epoch": 0.5177514792899408, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2450 }, { "epoch": 0.5230346576500423, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2475 }, { "epoch": 0.5283178360101437, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2500 }, { "epoch": 0.5336010143702451, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2525 }, { "epoch": 0.5388841927303466, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2550 }, { "epoch": 0.544167371090448, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2575 }, { "epoch": 0.5494505494505495, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2600 }, { "epoch": 0.5547337278106509, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2625 }, { "epoch": 0.5600169061707523, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2650 }, { "epoch": 0.5653000845308538, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2675 }, { "epoch": 0.5705832628909552, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2700 }, { "epoch": 0.5758664412510567, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2725 }, { "epoch": 0.5811496196111581, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2750 }, { "epoch": 0.5864327979712595, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2775 }, { "epoch": 0.591715976331361, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2800 }, { "epoch": 0.5969991546914624, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2825 }, { "epoch": 0.6022823330515639, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2850 }, { "epoch": 0.6075655114116653, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2875 }, { "epoch": 0.6128486897717667, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2900 }, { "epoch": 0.6181318681318682, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2925 }, { "epoch": 0.6234150464919695, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2950 }, { "epoch": 0.628698224852071, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2975 }, { "epoch": 0.6339814032121724, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3000 }, { "epoch": 0.6392645815722738, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3025 }, { "epoch": 0.6445477599323753, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3050 }, { "epoch": 0.6498309382924767, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3075 }, { "epoch": 0.6551141166525782, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3100 }, { "epoch": 0.6603972950126796, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3125 }, { "epoch": 0.665680473372781, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3150 }, { "epoch": 0.6709636517328825, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3175 }, { "epoch": 0.6762468300929839, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3200 }, { "epoch": 0.6815300084530854, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3225 }, { "epoch": 0.6868131868131868, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3250 }, { "epoch": 0.6920963651732882, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3275 }, { "epoch": 0.6973795435333897, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3300 }, { "epoch": 0.7026627218934911, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3325 }, { "epoch": 0.7079459002535926, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3350 }, { "epoch": 0.713229078613694, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3375 }, { "epoch": 0.7185122569737954, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3400 }, { "epoch": 0.7237954353338969, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3425 }, { "epoch": 0.7290786136939983, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3450 }, { "epoch": 0.7343617920540998, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3475 }, { "epoch": 0.7396449704142012, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3500 }, { "epoch": 0.7449281487743026, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3525 }, { "epoch": 0.7502113271344041, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3550 }, { "epoch": 0.7554945054945055, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3575 }, { "epoch": 0.760777683854607, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3600 }, { "epoch": 0.7660608622147084, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3625 }, { "epoch": 0.7713440405748098, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3650 }, { "epoch": 0.7766272189349113, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3675 }, { "epoch": 0.7819103972950127, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3700 }, { "epoch": 0.7871935756551142, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3725 }, { "epoch": 0.7924767540152156, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3750 }, { "epoch": 0.797759932375317, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3775 }, { "epoch": 0.8030431107354185, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3800 }, { "epoch": 0.8083262890955198, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3825 }, { "epoch": 0.8136094674556213, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3850 }, { "epoch": 0.8188926458157227, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3875 }, { "epoch": 0.8241758241758241, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3900 }, { "epoch": 0.8294590025359256, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3925 }, { "epoch": 0.834742180896027, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3950 }, { "epoch": 0.8400253592561285, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3975 }, { "epoch": 0.8453085376162299, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4000 }, { "epoch": 0.8505917159763313, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4025 }, { "epoch": 0.8558748943364328, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4050 }, { "epoch": 0.8611580726965342, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4075 }, { "epoch": 0.8664412510566357, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4100 }, { "epoch": 0.8717244294167371, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4125 }, { "epoch": 0.8770076077768385, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4150 }, { "epoch": 0.88229078613694, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4175 }, { "epoch": 0.8875739644970414, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4200 }, { "epoch": 0.8928571428571429, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4225 }, { "epoch": 0.8981403212172443, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4250 }, { "epoch": 0.9034234995773457, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4275 }, { "epoch": 0.9087066779374472, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4300 }, { "epoch": 0.9139898562975486, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4325 }, { "epoch": 0.9192730346576501, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4350 }, { "epoch": 0.9245562130177515, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4375 }, { "epoch": 0.9298393913778529, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4400 }, { "epoch": 0.9351225697379544, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4425 }, { "epoch": 0.9404057480980558, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4450 }, { "epoch": 0.9456889264581573, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4475 }, { "epoch": 0.9509721048182587, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4500 }, { "epoch": 0.9562552831783601, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4525 }, { "epoch": 0.9615384615384616, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4550 }, { "epoch": 0.966821639898563, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4575 }, { "epoch": 0.9721048182586645, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4600 }, { "epoch": 0.9773879966187659, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4625 }, { "epoch": 0.9826711749788672, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4650 }, { "epoch": 0.9879543533389687, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4675 }, { "epoch": 0.9932375316990701, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4700 }, { "epoch": 0.9985207100591716, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4725 }, { "epoch": 1.0, "eval_gen_len": 13.1445, "eval_loss": NaN, "eval_rouge1": 23.1004, "eval_rouge2": 10.166, "eval_rougeL": 21.5532, "eval_rougeLsum": 21.5697, "eval_runtime": 125.2834, "eval_samples_per_second": 9.443, "eval_steps_per_second": 2.363, "step": 4732 } ], "logging_steps": 25, "max_steps": 14196, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 464048823453696.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }