{ "best_metric": 0.9470943315331984, "best_model_checkpoint": "/shared/3/projects/hiatus/tagged_data/models/roberta-base/binary-finetune-full/results/checkpoint-168317", "epoch": 1.760049774157839, "eval_steps": 9901, "global_step": 217822, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004040110214206643, "grad_norm": 0.24907590448856354, "learning_rate": 1.9989899724464485e-05, "loss": 0.3886, "step": 500 }, { "epoch": 0.008080220428413287, "grad_norm": 0.19640618562698364, "learning_rate": 1.997979944892897e-05, "loss": 0.2835, "step": 1000 }, { "epoch": 0.01212033064261993, "grad_norm": 0.21297627687454224, "learning_rate": 1.9969699173393452e-05, "loss": 0.2123, "step": 1500 }, { "epoch": 0.016160440856826573, "grad_norm": 0.1339714229106903, "learning_rate": 1.9959598897857935e-05, "loss": 0.1631, "step": 2000 }, { "epoch": 0.020200551071033218, "grad_norm": 0.20592452585697174, "learning_rate": 1.994949862232242e-05, "loss": 0.1306, "step": 2500 }, { "epoch": 0.02424066128523986, "grad_norm": 0.1734761744737625, "learning_rate": 1.9939398346786902e-05, "loss": 0.1134, "step": 3000 }, { "epoch": 0.028280771499446505, "grad_norm": 0.1429387331008911, "learning_rate": 1.9929298071251385e-05, "loss": 0.0954, "step": 3500 }, { "epoch": 0.032320881713653146, "grad_norm": 0.13530635833740234, "learning_rate": 1.991919779571587e-05, "loss": 0.0833, "step": 4000 }, { "epoch": 0.03636099192785979, "grad_norm": 0.16425204277038574, "learning_rate": 1.9909097520180352e-05, "loss": 0.0739, "step": 4500 }, { "epoch": 0.040401102142066436, "grad_norm": 0.15035004913806915, "learning_rate": 1.9898997244644835e-05, "loss": 0.0657, "step": 5000 }, { "epoch": 0.04444121235627308, "grad_norm": 0.1048048660159111, "learning_rate": 1.988889696910932e-05, "loss": 0.0606, "step": 5500 }, { "epoch": 0.04848132257047972, "grad_norm": 0.1167823076248169, "learning_rate": 1.9878796693573802e-05, "loss": 0.0555, "step": 6000 }, { "epoch": 0.052521432784686364, "grad_norm": 0.17904673516750336, "learning_rate": 1.9868696418038285e-05, "loss": 0.0517, "step": 6500 }, { "epoch": 0.05656154299889301, "grad_norm": 0.22022856771945953, "learning_rate": 1.985859614250277e-05, "loss": 0.0483, "step": 7000 }, { "epoch": 0.060601653213099654, "grad_norm": 0.11566773056983948, "learning_rate": 1.9848495866967252e-05, "loss": 0.0445, "step": 7500 }, { "epoch": 0.06464176342730629, "grad_norm": 0.09606140851974487, "learning_rate": 1.9838395591431736e-05, "loss": 0.0434, "step": 8000 }, { "epoch": 0.06868187364151294, "grad_norm": 0.14014209806919098, "learning_rate": 1.982829531589622e-05, "loss": 0.0415, "step": 8500 }, { "epoch": 0.07272198385571958, "grad_norm": 0.1238802894949913, "learning_rate": 1.9818195040360702e-05, "loss": 0.0401, "step": 9000 }, { "epoch": 0.07676209406992623, "grad_norm": 0.27284470200538635, "learning_rate": 1.9808094764825186e-05, "loss": 0.0375, "step": 9500 }, { "epoch": 0.08000226246171996, "eval_f1_macro": 0.8895664094153212, "eval_f1_micro": 0.9635283563255777, "eval_loss": 0.11441826820373535, "eval_precision_macro": 0.924616270515263, "eval_precision_micro": 0.9673416146866455, "eval_recall_macro": 0.8766278018110966, "eval_recall_micro": 0.9597450436296798, "eval_runtime": 13336.1892, "eval_samples_per_second": 2.97, "eval_steps_per_second": 0.012, "step": 9901 }, { "epoch": 0.08080220428413287, "grad_norm": 0.07111234217882156, "learning_rate": 1.979799448928967e-05, "loss": 0.037, "step": 10000 }, { "epoch": 0.08484231449833951, "grad_norm": 0.11708366125822067, "learning_rate": 1.9787894213754152e-05, "loss": 0.0337, "step": 10500 }, { "epoch": 0.08888242471254616, "grad_norm": 0.12418048083782196, "learning_rate": 1.9777793938218636e-05, "loss": 0.0331, "step": 11000 }, { "epoch": 0.0929225349267528, "grad_norm": 0.09599123895168304, "learning_rate": 1.976769366268312e-05, "loss": 0.032, "step": 11500 }, { "epoch": 0.09696264514095944, "grad_norm": 0.08350682258605957, "learning_rate": 1.9757593387147602e-05, "loss": 0.03, "step": 12000 }, { "epoch": 0.10100275535516609, "grad_norm": 0.1321333944797516, "learning_rate": 1.9747493111612086e-05, "loss": 0.03, "step": 12500 }, { "epoch": 0.10504286556937273, "grad_norm": 0.09571998566389084, "learning_rate": 1.973739283607657e-05, "loss": 0.0281, "step": 13000 }, { "epoch": 0.10908297578357938, "grad_norm": 0.11053865402936935, "learning_rate": 1.9727292560541053e-05, "loss": 0.0277, "step": 13500 }, { "epoch": 0.11312308599778602, "grad_norm": 0.06845366209745407, "learning_rate": 1.9717192285005536e-05, "loss": 0.0274, "step": 14000 }, { "epoch": 0.11716319621199266, "grad_norm": 0.11518129706382751, "learning_rate": 1.970709200947002e-05, "loss": 0.0267, "step": 14500 }, { "epoch": 0.12120330642619931, "grad_norm": 0.09419895708560944, "learning_rate": 1.9696991733934503e-05, "loss": 0.0259, "step": 15000 }, { "epoch": 0.12524341664040595, "grad_norm": 0.11040966212749481, "learning_rate": 1.9686891458398986e-05, "loss": 0.0258, "step": 15500 }, { "epoch": 0.12928352685461258, "grad_norm": 0.08434844762086868, "learning_rate": 1.967679118286347e-05, "loss": 0.0253, "step": 16000 }, { "epoch": 0.13332363706881925, "grad_norm": 0.13895830512046814, "learning_rate": 1.9666690907327953e-05, "loss": 0.0247, "step": 16500 }, { "epoch": 0.1373637472830259, "grad_norm": 0.10540761798620224, "learning_rate": 1.9656590631792436e-05, "loss": 0.0245, "step": 17000 }, { "epoch": 0.14140385749723253, "grad_norm": 0.09767123311758041, "learning_rate": 1.964649035625692e-05, "loss": 0.0245, "step": 17500 }, { "epoch": 0.14544396771143916, "grad_norm": 0.08917172998189926, "learning_rate": 1.9636390080721403e-05, "loss": 0.0251, "step": 18000 }, { "epoch": 0.1494840779256458, "grad_norm": 0.13313362002372742, "learning_rate": 1.9626289805185886e-05, "loss": 0.0252, "step": 18500 }, { "epoch": 0.15352418813985247, "grad_norm": 0.12236423790454865, "learning_rate": 1.961618952965037e-05, "loss": 0.0251, "step": 19000 }, { "epoch": 0.1575642983540591, "grad_norm": 0.09876661747694016, "learning_rate": 1.9606089254114853e-05, "loss": 0.0249, "step": 19500 }, { "epoch": 0.16000452492343992, "eval_f1_macro": 0.9035923074831559, "eval_f1_micro": 0.9606367859628728, "eval_loss": 0.11549682915210724, "eval_precision_macro": 0.9036435627053582, "eval_precision_micro": 0.9530018135782053, "eval_recall_macro": 0.9133421606323336, "eval_recall_micro": 0.9683950814868224, "eval_runtime": 15280.9037, "eval_samples_per_second": 2.592, "eval_steps_per_second": 0.01, "step": 19802 }, { "epoch": 0.16160440856826574, "grad_norm": 0.057935502380132675, "learning_rate": 1.9595988978579336e-05, "loss": 0.0246, "step": 20000 }, { "epoch": 0.16564451878247238, "grad_norm": 0.13269482553005219, "learning_rate": 1.958588870304382e-05, "loss": 0.0227, "step": 20500 }, { "epoch": 0.16968462899667902, "grad_norm": 0.09697619825601578, "learning_rate": 1.9575788427508303e-05, "loss": 0.0229, "step": 21000 }, { "epoch": 0.17372473921088566, "grad_norm": 0.2296031266450882, "learning_rate": 1.9565688151972786e-05, "loss": 0.0222, "step": 21500 }, { "epoch": 0.17776484942509233, "grad_norm": 0.07054860144853592, "learning_rate": 1.955558787643727e-05, "loss": 0.0217, "step": 22000 }, { "epoch": 0.18180495963929896, "grad_norm": 0.14594444632530212, "learning_rate": 1.9545487600901753e-05, "loss": 0.0216, "step": 22500 }, { "epoch": 0.1858450698535056, "grad_norm": 0.1951671838760376, "learning_rate": 1.9535387325366237e-05, "loss": 0.0236, "step": 23000 }, { "epoch": 0.18988518006771224, "grad_norm": 0.09014302492141724, "learning_rate": 1.952528704983072e-05, "loss": 0.0241, "step": 23500 }, { "epoch": 0.19392529028191888, "grad_norm": 0.07351396232843399, "learning_rate": 1.9515186774295207e-05, "loss": 0.0246, "step": 24000 }, { "epoch": 0.19796540049612554, "grad_norm": 0.13433478772640228, "learning_rate": 1.950508649875969e-05, "loss": 0.0237, "step": 24500 }, { "epoch": 0.20200551071033218, "grad_norm": 0.09406758099794388, "learning_rate": 1.949498622322417e-05, "loss": 0.0246, "step": 25000 }, { "epoch": 0.20604562092453882, "grad_norm": 0.0723891332745552, "learning_rate": 1.9484885947688653e-05, "loss": 0.0248, "step": 25500 }, { "epoch": 0.21008573113874546, "grad_norm": 0.0635887160897255, "learning_rate": 1.9474785672153137e-05, "loss": 0.0243, "step": 26000 }, { "epoch": 0.2141258413529521, "grad_norm": 0.12226919084787369, "learning_rate": 1.9464685396617624e-05, "loss": 0.0246, "step": 26500 }, { "epoch": 0.21816595156715876, "grad_norm": 0.17104922235012054, "learning_rate": 1.9454585121082107e-05, "loss": 0.0244, "step": 27000 }, { "epoch": 0.2222060617813654, "grad_norm": 0.07366731762886047, "learning_rate": 1.9444484845546587e-05, "loss": 0.0236, "step": 27500 }, { "epoch": 0.22624617199557204, "grad_norm": 0.05332706496119499, "learning_rate": 1.943438457001107e-05, "loss": 0.0191, "step": 28000 }, { "epoch": 0.23028628220977868, "grad_norm": 0.12188129127025604, "learning_rate": 1.9424284294475554e-05, "loss": 0.0184, "step": 28500 }, { "epoch": 0.2343263924239853, "grad_norm": 0.07722073048353195, "learning_rate": 1.941418401894004e-05, "loss": 0.018, "step": 29000 }, { "epoch": 0.23836650263819198, "grad_norm": 0.07803859561681747, "learning_rate": 1.9404083743404524e-05, "loss": 0.0176, "step": 29500 }, { "epoch": 0.24000678738515988, "eval_f1_macro": 0.9189758616039659, "eval_f1_micro": 0.9664452050510047, "eval_loss": 0.10743161290884018, "eval_precision_macro": 0.9454764579982075, "eval_precision_micro": 0.9680139971687907, "eval_recall_macro": 0.9051902291376027, "eval_recall_micro": 0.9648814895676969, "eval_runtime": 13586.8138, "eval_samples_per_second": 2.915, "eval_steps_per_second": 0.011, "step": 29703 }, { "epoch": 0.24240661285239862, "grad_norm": 0.04772321879863739, "learning_rate": 1.9393983467869004e-05, "loss": 0.0177, "step": 30000 }, { "epoch": 0.24644672306660526, "grad_norm": 0.11928682029247284, "learning_rate": 1.9383883192333487e-05, "loss": 0.0229, "step": 30500 }, { "epoch": 0.2504868332808119, "grad_norm": 0.08102133870124817, "learning_rate": 1.937378291679797e-05, "loss": 0.0228, "step": 31000 }, { "epoch": 0.25452694349501853, "grad_norm": 0.073371522128582, "learning_rate": 1.9363682641262457e-05, "loss": 0.0231, "step": 31500 }, { "epoch": 0.25856705370922517, "grad_norm": 0.08793163299560547, "learning_rate": 1.935358236572694e-05, "loss": 0.0229, "step": 32000 }, { "epoch": 0.2626071639234318, "grad_norm": 0.12065927684307098, "learning_rate": 1.934348209019142e-05, "loss": 0.0231, "step": 32500 }, { "epoch": 0.2666472741376385, "grad_norm": 0.10052382946014404, "learning_rate": 1.9333381814655904e-05, "loss": 0.0218, "step": 33000 }, { "epoch": 0.27068738435184514, "grad_norm": 0.08048272132873535, "learning_rate": 1.9323281539120387e-05, "loss": 0.0227, "step": 33500 }, { "epoch": 0.2747274945660518, "grad_norm": 0.05087564140558243, "learning_rate": 1.9313181263584874e-05, "loss": 0.0206, "step": 34000 }, { "epoch": 0.2787676047802584, "grad_norm": 0.13495181500911713, "learning_rate": 1.9303080988049357e-05, "loss": 0.0213, "step": 34500 }, { "epoch": 0.28280771499446505, "grad_norm": 0.104469895362854, "learning_rate": 1.9292980712513837e-05, "loss": 0.0215, "step": 35000 }, { "epoch": 0.2868478252086717, "grad_norm": 0.10830747336149216, "learning_rate": 1.928288043697832e-05, "loss": 0.0232, "step": 35500 }, { "epoch": 0.29088793542287833, "grad_norm": 0.07727912068367004, "learning_rate": 1.9272780161442804e-05, "loss": 0.0239, "step": 36000 }, { "epoch": 0.29492804563708497, "grad_norm": 0.11781858652830124, "learning_rate": 1.926267988590729e-05, "loss": 0.0229, "step": 36500 }, { "epoch": 0.2989681558512916, "grad_norm": 0.09465543925762177, "learning_rate": 1.9252579610371774e-05, "loss": 0.0241, "step": 37000 }, { "epoch": 0.30300826606549824, "grad_norm": 0.08024444431066513, "learning_rate": 1.9242479334836254e-05, "loss": 0.0236, "step": 37500 }, { "epoch": 0.30704837627970494, "grad_norm": 0.06720072776079178, "learning_rate": 1.9232379059300738e-05, "loss": 0.0211, "step": 38000 }, { "epoch": 0.3110884864939116, "grad_norm": 0.13362745940685272, "learning_rate": 1.922227878376522e-05, "loss": 0.0203, "step": 38500 }, { "epoch": 0.3151285967081182, "grad_norm": 0.09114370495080948, "learning_rate": 1.9212178508229708e-05, "loss": 0.0203, "step": 39000 }, { "epoch": 0.31916870692232485, "grad_norm": 0.07381443679332733, "learning_rate": 1.920207823269419e-05, "loss": 0.0206, "step": 39500 }, { "epoch": 0.32000904984687983, "eval_f1_macro": 0.9235874471417507, "eval_f1_micro": 0.9663972411435532, "eval_loss": 0.10308045893907547, "eval_precision_macro": 0.9426979961917242, "eval_precision_micro": 0.9680776634557233, "eval_recall_macro": 0.9108391105037915, "eval_recall_micro": 0.9647226425913261, "eval_runtime": 13873.9388, "eval_samples_per_second": 2.854, "eval_steps_per_second": 0.011, "step": 39604 }, { "epoch": 0.3232088171365315, "grad_norm": 0.07686352729797363, "learning_rate": 1.9191977957158674e-05, "loss": 0.0204, "step": 40000 }, { "epoch": 0.32724892735073813, "grad_norm": 0.15459179878234863, "learning_rate": 1.9181877681623154e-05, "loss": 0.0225, "step": 40500 }, { "epoch": 0.33128903756494477, "grad_norm": 0.11474985629320145, "learning_rate": 1.9171777406087638e-05, "loss": 0.0229, "step": 41000 }, { "epoch": 0.3353291477791514, "grad_norm": 0.09817365556955338, "learning_rate": 1.9161677130552125e-05, "loss": 0.0229, "step": 41500 }, { "epoch": 0.33936925799335804, "grad_norm": 0.07288320362567902, "learning_rate": 1.9151576855016608e-05, "loss": 0.0222, "step": 42000 }, { "epoch": 0.3434093682075647, "grad_norm": 0.13258545100688934, "learning_rate": 1.914147657948109e-05, "loss": 0.0221, "step": 42500 }, { "epoch": 0.3474494784217713, "grad_norm": 0.09609493613243103, "learning_rate": 1.913137630394557e-05, "loss": 0.0224, "step": 43000 }, { "epoch": 0.351489588635978, "grad_norm": 0.0800900012254715, "learning_rate": 1.9121276028410055e-05, "loss": 0.022, "step": 43500 }, { "epoch": 0.35552969885018465, "grad_norm": 0.06640051305294037, "learning_rate": 1.911117575287454e-05, "loss": 0.0209, "step": 44000 }, { "epoch": 0.3595698090643913, "grad_norm": 0.13987226784229279, "learning_rate": 1.9101075477339025e-05, "loss": 0.022, "step": 44500 }, { "epoch": 0.3636099192785979, "grad_norm": 0.08626226335763931, "learning_rate": 1.9090975201803508e-05, "loss": 0.0219, "step": 45000 }, { "epoch": 0.36765002949280456, "grad_norm": 0.09093815088272095, "learning_rate": 1.9080874926267988e-05, "loss": 0.0191, "step": 45500 }, { "epoch": 0.3716901397070112, "grad_norm": 0.062450163066387177, "learning_rate": 1.907077465073247e-05, "loss": 0.0192, "step": 46000 }, { "epoch": 0.37573024992121784, "grad_norm": 0.14638446271419525, "learning_rate": 1.9060674375196958e-05, "loss": 0.0186, "step": 46500 }, { "epoch": 0.3797703601354245, "grad_norm": 0.08730041235685349, "learning_rate": 1.905057409966144e-05, "loss": 0.019, "step": 47000 }, { "epoch": 0.3838104703496311, "grad_norm": 0.09185372292995453, "learning_rate": 1.9040473824125925e-05, "loss": 0.0189, "step": 47500 }, { "epoch": 0.38785058056383775, "grad_norm": 0.05995471775531769, "learning_rate": 1.9030373548590405e-05, "loss": 0.0174, "step": 48000 }, { "epoch": 0.39189069077804445, "grad_norm": 0.14513157308101654, "learning_rate": 1.902027327305489e-05, "loss": 0.0167, "step": 48500 }, { "epoch": 0.3959308009922511, "grad_norm": 0.08400790393352509, "learning_rate": 1.9010172997519375e-05, "loss": 0.0165, "step": 49000 }, { "epoch": 0.3999709112064577, "grad_norm": 0.0705028846859932, "learning_rate": 1.900007272198386e-05, "loss": 0.0165, "step": 49500 }, { "epoch": 0.40001131230859976, "eval_f1_macro": 0.9281014680466918, "eval_f1_micro": 0.9684054719516875, "eval_loss": 0.10883225500583649, "eval_precision_macro": 0.9609447042869569, "eval_precision_micro": 0.97977646274136, "eval_recall_macro": 0.9023603199352568, "eval_recall_micro": 0.9572953897303177, "eval_runtime": 14645.8089, "eval_samples_per_second": 2.704, "eval_steps_per_second": 0.011, "step": 49505 }, { "epoch": 0.40401102142066436, "grad_norm": 0.06206486374139786, "learning_rate": 1.8989972446448342e-05, "loss": 0.0163, "step": 50000 }, { "epoch": 0.408051131634871, "grad_norm": 0.13632065057754517, "learning_rate": 1.8979872170912822e-05, "loss": 0.0175, "step": 50500 }, { "epoch": 0.41209124184907764, "grad_norm": 0.10581111907958984, "learning_rate": 1.896977189537731e-05, "loss": 0.0179, "step": 51000 }, { "epoch": 0.4161313520632843, "grad_norm": 0.05609723553061485, "learning_rate": 1.8959671619841792e-05, "loss": 0.0171, "step": 51500 }, { "epoch": 0.4201714622774909, "grad_norm": 0.0569671131670475, "learning_rate": 1.8949571344306275e-05, "loss": 0.0177, "step": 52000 }, { "epoch": 0.42421157249169755, "grad_norm": 0.12548725306987762, "learning_rate": 1.893947106877076e-05, "loss": 0.0179, "step": 52500 }, { "epoch": 0.4282516827059042, "grad_norm": 0.14123043417930603, "learning_rate": 1.892937079323524e-05, "loss": 0.0195, "step": 53000 }, { "epoch": 0.4322917929201109, "grad_norm": 0.07868105173110962, "learning_rate": 1.8919270517699725e-05, "loss": 0.0195, "step": 53500 }, { "epoch": 0.4363319031343175, "grad_norm": 0.0551162026822567, "learning_rate": 1.890917024216421e-05, "loss": 0.0194, "step": 54000 }, { "epoch": 0.44037201334852416, "grad_norm": 0.12377525120973587, "learning_rate": 1.8899069966628692e-05, "loss": 0.0193, "step": 54500 }, { "epoch": 0.4444121235627308, "grad_norm": 0.07947281748056412, "learning_rate": 1.8888969691093175e-05, "loss": 0.0195, "step": 55000 }, { "epoch": 0.44845223377693744, "grad_norm": 0.07180605828762054, "learning_rate": 1.887886941555766e-05, "loss": 0.0186, "step": 55500 }, { "epoch": 0.4524923439911441, "grad_norm": 0.0590415820479393, "learning_rate": 1.8868769140022142e-05, "loss": 0.0186, "step": 56000 }, { "epoch": 0.4565324542053507, "grad_norm": 0.12405771017074585, "learning_rate": 1.8858668864486626e-05, "loss": 0.0181, "step": 56500 }, { "epoch": 0.46057256441955735, "grad_norm": 0.09074413031339645, "learning_rate": 1.884856858895111e-05, "loss": 0.0178, "step": 57000 }, { "epoch": 0.464612674633764, "grad_norm": 0.12590628862380981, "learning_rate": 1.8838468313415592e-05, "loss": 0.0178, "step": 57500 }, { "epoch": 0.4686527848479706, "grad_norm": 0.047191109508275986, "learning_rate": 1.8828368037880076e-05, "loss": 0.0154, "step": 58000 }, { "epoch": 0.4726928950621773, "grad_norm": 0.13741852343082428, "learning_rate": 1.881826776234456e-05, "loss": 0.0156, "step": 58500 }, { "epoch": 0.47673300527638396, "grad_norm": 0.07473180443048477, "learning_rate": 1.8808167486809042e-05, "loss": 0.0152, "step": 59000 }, { "epoch": 0.48001357477031975, "eval_f1_macro": 0.9238212336154783, "eval_f1_micro": 0.9658474370811376, "eval_loss": 0.10412032902240753, "eval_precision_macro": 0.9360090910956397, "eval_precision_micro": 0.9629420110715917, "eval_recall_macro": 0.9167736650129396, "eval_recall_micro": 0.9687704488794511, "eval_runtime": 13864.7476, "eval_samples_per_second": 2.856, "eval_steps_per_second": 0.011, "step": 59406 }, { "epoch": 0.4807731154905906, "grad_norm": 0.06410785764455795, "learning_rate": 1.8798067211273526e-05, "loss": 0.0147, "step": 59500 }, { "epoch": 0.48481322570479723, "grad_norm": 0.05010313540697098, "learning_rate": 1.878796693573801e-05, "loss": 0.0156, "step": 60000 }, { "epoch": 0.4888533359190039, "grad_norm": 0.14338257908821106, "learning_rate": 1.8777866660202493e-05, "loss": 0.0164, "step": 60500 }, { "epoch": 0.4928934461332105, "grad_norm": 0.09123385697603226, "learning_rate": 1.8767766384666976e-05, "loss": 0.0174, "step": 61000 }, { "epoch": 0.49693355634741715, "grad_norm": 0.07728511840105057, "learning_rate": 1.875766610913146e-05, "loss": 0.017, "step": 61500 }, { "epoch": 0.5009736665616238, "grad_norm": 0.06151897832751274, "learning_rate": 1.8747565833595943e-05, "loss": 0.0172, "step": 62000 }, { "epoch": 0.5050137767758305, "grad_norm": 0.14278863370418549, "learning_rate": 1.8737465558060426e-05, "loss": 0.0166, "step": 62500 }, { "epoch": 0.5090538869900371, "grad_norm": 0.08395873010158539, "learning_rate": 1.872736528252491e-05, "loss": 0.0206, "step": 63000 }, { "epoch": 0.5130939972042438, "grad_norm": 0.09704262018203735, "learning_rate": 1.8717265006989393e-05, "loss": 0.0208, "step": 63500 }, { "epoch": 0.5171341074184503, "grad_norm": 0.06397638469934464, "learning_rate": 1.8707164731453876e-05, "loss": 0.0208, "step": 64000 }, { "epoch": 0.521174217632657, "grad_norm": 0.1525479257106781, "learning_rate": 1.869706445591836e-05, "loss": 0.0207, "step": 64500 }, { "epoch": 0.5252143278468636, "grad_norm": 0.0878639966249466, "learning_rate": 1.8686964180382843e-05, "loss": 0.0198, "step": 65000 }, { "epoch": 0.5292544380610703, "grad_norm": 0.05913593992590904, "learning_rate": 1.8676863904847326e-05, "loss": 0.0166, "step": 65500 }, { "epoch": 0.533294548275277, "grad_norm": 0.05049494653940201, "learning_rate": 1.866676362931181e-05, "loss": 0.0166, "step": 66000 }, { "epoch": 0.5373346584894836, "grad_norm": 0.10428164154291153, "learning_rate": 1.8656663353776293e-05, "loss": 0.0174, "step": 66500 }, { "epoch": 0.5413747687036903, "grad_norm": 0.08380962908267975, "learning_rate": 1.8646563078240776e-05, "loss": 0.0165, "step": 67000 }, { "epoch": 0.5454148789178969, "grad_norm": 0.12970462441444397, "learning_rate": 1.863646280270526e-05, "loss": 0.0164, "step": 67500 }, { "epoch": 0.5494549891321036, "grad_norm": 0.12594661116600037, "learning_rate": 1.8626362527169743e-05, "loss": 0.0189, "step": 68000 }, { "epoch": 0.5534950993463101, "grad_norm": 0.11368534713983536, "learning_rate": 1.8616262251634226e-05, "loss": 0.019, "step": 68500 }, { "epoch": 0.5575352095605168, "grad_norm": 0.08376836031675339, "learning_rate": 1.860616197609871e-05, "loss": 0.0191, "step": 69000 }, { "epoch": 0.5600158372320397, "eval_f1_macro": 0.9256130258855139, "eval_f1_micro": 0.9668266025133863, "eval_loss": 0.10856343805789948, "eval_precision_macro": 0.9437511235105461, "eval_precision_micro": 0.9695209969914294, "eval_recall_macro": 0.9124750251974688, "eval_recall_micro": 0.9641471425069983, "eval_runtime": 14189.8384, "eval_samples_per_second": 2.791, "eval_steps_per_second": 0.011, "step": 69307 }, { "epoch": 0.5615753197747234, "grad_norm": 0.08000296354293823, "learning_rate": 1.8596061700563193e-05, "loss": 0.0188, "step": 69500 }, { "epoch": 0.5656154299889301, "grad_norm": 0.06347772479057312, "learning_rate": 1.8585961425027677e-05, "loss": 0.0187, "step": 70000 }, { "epoch": 0.5696555402031367, "grad_norm": 0.21189579367637634, "learning_rate": 1.857586114949216e-05, "loss": 0.0196, "step": 70500 }, { "epoch": 0.5736956504173434, "grad_norm": 0.07940568774938583, "learning_rate": 1.8565760873956643e-05, "loss": 0.0196, "step": 71000 }, { "epoch": 0.5777357606315501, "grad_norm": 0.07458707690238953, "learning_rate": 1.8555660598421127e-05, "loss": 0.0199, "step": 71500 }, { "epoch": 0.5817758708457567, "grad_norm": 0.0705709308385849, "learning_rate": 1.854556032288561e-05, "loss": 0.0193, "step": 72000 }, { "epoch": 0.5858159810599634, "grad_norm": 0.13246993720531464, "learning_rate": 1.8535460047350093e-05, "loss": 0.0195, "step": 72500 }, { "epoch": 0.5898560912741699, "grad_norm": 0.08721259236335754, "learning_rate": 1.8525359771814577e-05, "loss": 0.0196, "step": 73000 }, { "epoch": 0.5938962014883766, "grad_norm": 0.07570379972457886, "learning_rate": 1.851525949627906e-05, "loss": 0.0186, "step": 73500 }, { "epoch": 0.5979363117025832, "grad_norm": 0.07477313280105591, "learning_rate": 1.8505159220743543e-05, "loss": 0.0183, "step": 74000 }, { "epoch": 0.6019764219167899, "grad_norm": 0.15558893978595734, "learning_rate": 1.8495058945208027e-05, "loss": 0.0194, "step": 74500 }, { "epoch": 0.6060165321309965, "grad_norm": 0.08373390883207321, "learning_rate": 1.848495866967251e-05, "loss": 0.0189, "step": 75000 }, { "epoch": 0.6100566423452032, "grad_norm": 0.06340883672237396, "learning_rate": 1.8474858394136994e-05, "loss": 0.0139, "step": 75500 }, { "epoch": 0.6140967525594099, "grad_norm": 0.05438007041811943, "learning_rate": 1.8464758118601477e-05, "loss": 0.0148, "step": 76000 }, { "epoch": 0.6181368627736165, "grad_norm": 0.1218661442399025, "learning_rate": 1.845465784306596e-05, "loss": 0.0151, "step": 76500 }, { "epoch": 0.6221769729878232, "grad_norm": 0.0688873752951622, "learning_rate": 1.8444557567530444e-05, "loss": 0.0143, "step": 77000 }, { "epoch": 0.6262170832020297, "grad_norm": 0.058265481144189835, "learning_rate": 1.8434457291994927e-05, "loss": 0.0142, "step": 77500 }, { "epoch": 0.6302571934162364, "grad_norm": 0.046319037675857544, "learning_rate": 1.842435701645941e-05, "loss": 0.0142, "step": 78000 }, { "epoch": 0.634297303630443, "grad_norm": 0.14100997149944305, "learning_rate": 1.8414256740923894e-05, "loss": 0.0146, "step": 78500 }, { "epoch": 0.6383374138446497, "grad_norm": 0.10154972225427628, "learning_rate": 1.8404156465388377e-05, "loss": 0.0146, "step": 79000 }, { "epoch": 0.6400180996937597, "eval_f1_macro": 0.9268223044157705, "eval_f1_micro": 0.9675749211491975, "eval_loss": 0.1128077358007431, "eval_precision_macro": 0.9500253922065015, "eval_precision_micro": 0.9728413004763068, "eval_recall_macro": 0.9094651623357265, "eval_recall_micro": 0.962365252860739, "eval_runtime": 13980.4006, "eval_samples_per_second": 2.833, "eval_steps_per_second": 0.011, "step": 79208 }, { "epoch": 0.6423775240588563, "grad_norm": 0.1044822484254837, "learning_rate": 1.839405618985286e-05, "loss": 0.0142, "step": 79500 }, { "epoch": 0.646417634273063, "grad_norm": 0.061170101165771484, "learning_rate": 1.8383955914317344e-05, "loss": 0.0142, "step": 80000 }, { "epoch": 0.6504577444872696, "grad_norm": 0.09872958064079285, "learning_rate": 1.8373855638781827e-05, "loss": 0.0174, "step": 80500 }, { "epoch": 0.6544978547014763, "grad_norm": 0.08190814405679703, "learning_rate": 1.836375536324631e-05, "loss": 0.016, "step": 81000 }, { "epoch": 0.658537964915683, "grad_norm": 0.07712013274431229, "learning_rate": 1.8353655087710794e-05, "loss": 0.0172, "step": 81500 }, { "epoch": 0.6625780751298895, "grad_norm": 0.04823287948966026, "learning_rate": 1.8343554812175277e-05, "loss": 0.0168, "step": 82000 }, { "epoch": 0.6666181853440962, "grad_norm": 0.11726228892803192, "learning_rate": 1.833345453663976e-05, "loss": 0.017, "step": 82500 }, { "epoch": 0.6706582955583028, "grad_norm": 0.06535898894071579, "learning_rate": 1.8323354261104244e-05, "loss": 0.016, "step": 83000 }, { "epoch": 0.6746984057725095, "grad_norm": 0.05892045795917511, "learning_rate": 1.8313253985568727e-05, "loss": 0.0159, "step": 83500 }, { "epoch": 0.6787385159867161, "grad_norm": 0.04444234445691109, "learning_rate": 1.830315371003321e-05, "loss": 0.0153, "step": 84000 }, { "epoch": 0.6827786262009228, "grad_norm": 0.1465209275484085, "learning_rate": 1.8293053434497694e-05, "loss": 0.0156, "step": 84500 }, { "epoch": 0.6868187364151294, "grad_norm": 0.11835352331399918, "learning_rate": 1.8282953158962178e-05, "loss": 0.0154, "step": 85000 }, { "epoch": 0.690858846629336, "grad_norm": 0.05793392285704613, "learning_rate": 1.827285288342666e-05, "loss": 0.0138, "step": 85500 }, { "epoch": 0.6948989568435426, "grad_norm": 0.045407455414533615, "learning_rate": 1.8262752607891144e-05, "loss": 0.0133, "step": 86000 }, { "epoch": 0.6989390670577493, "grad_norm": 0.12997862696647644, "learning_rate": 1.8252652332355628e-05, "loss": 0.0141, "step": 86500 }, { "epoch": 0.702979177271956, "grad_norm": 0.07040946930646896, "learning_rate": 1.824255205682011e-05, "loss": 0.0138, "step": 87000 }, { "epoch": 0.7070192874861626, "grad_norm": 0.05935658514499664, "learning_rate": 1.8232451781284594e-05, "loss": 0.0144, "step": 87500 }, { "epoch": 0.7110593977003693, "grad_norm": 0.0425080843269825, "learning_rate": 1.8222351505749078e-05, "loss": 0.018, "step": 88000 }, { "epoch": 0.7150995079145759, "grad_norm": 0.1149262934923172, "learning_rate": 1.821225123021356e-05, "loss": 0.0177, "step": 88500 }, { "epoch": 0.7191396181287826, "grad_norm": 0.08022065460681915, "learning_rate": 1.8202150954678045e-05, "loss": 0.0172, "step": 89000 }, { "epoch": 0.7200203621554796, "eval_f1_macro": 0.9342654788805338, "eval_f1_micro": 0.9723695526241013, "eval_loss": 0.09522199630737305, "eval_precision_macro": 0.9684740304119624, "eval_precision_micro": 0.9878875975715066, "eval_recall_macro": 0.9056287281769387, "eval_recall_micro": 0.9573314924580955, "eval_runtime": 13779.2013, "eval_samples_per_second": 2.874, "eval_steps_per_second": 0.011, "step": 89109 }, { "epoch": 0.7231797283429892, "grad_norm": 0.06360196322202682, "learning_rate": 1.8192050679142528e-05, "loss": 0.0167, "step": 89500 }, { "epoch": 0.7272198385571959, "grad_norm": 0.0733686089515686, "learning_rate": 1.818195040360701e-05, "loss": 0.0179, "step": 90000 }, { "epoch": 0.7312599487714024, "grad_norm": 0.1344570368528366, "learning_rate": 1.8171850128071495e-05, "loss": 0.0256, "step": 90500 }, { "epoch": 0.7353000589856091, "grad_norm": 0.0946430116891861, "learning_rate": 1.8161749852535978e-05, "loss": 0.026, "step": 91000 }, { "epoch": 0.7393401691998158, "grad_norm": 0.07514828443527222, "learning_rate": 1.815164957700046e-05, "loss": 0.0251, "step": 91500 }, { "epoch": 0.7433802794140224, "grad_norm": 0.06544400006532669, "learning_rate": 1.8141549301464945e-05, "loss": 0.0247, "step": 92000 }, { "epoch": 0.7474203896282291, "grad_norm": 0.11973392963409424, "learning_rate": 1.8131449025929428e-05, "loss": 0.0242, "step": 92500 }, { "epoch": 0.7514604998424357, "grad_norm": 0.07870098203420639, "learning_rate": 1.812134875039391e-05, "loss": 0.0197, "step": 93000 }, { "epoch": 0.7555006100566424, "grad_norm": 0.06315948814153671, "learning_rate": 1.8111248474858395e-05, "loss": 0.0189, "step": 93500 }, { "epoch": 0.759540720270849, "grad_norm": 0.05281440541148186, "learning_rate": 1.810114819932288e-05, "loss": 0.0183, "step": 94000 }, { "epoch": 0.7635808304850557, "grad_norm": 0.11212711036205292, "learning_rate": 1.809104792378736e-05, "loss": 0.0189, "step": 94500 }, { "epoch": 0.7676209406992622, "grad_norm": 0.13350194692611694, "learning_rate": 1.8080947648251845e-05, "loss": 0.019, "step": 95000 }, { "epoch": 0.7716610509134689, "grad_norm": 0.06391710788011551, "learning_rate": 1.8070847372716328e-05, "loss": 0.0176, "step": 95500 }, { "epoch": 0.7757011611276755, "grad_norm": 0.06272578239440918, "learning_rate": 1.806074709718081e-05, "loss": 0.0175, "step": 96000 }, { "epoch": 0.7797412713418822, "grad_norm": 0.10559968650341034, "learning_rate": 1.80506468216453e-05, "loss": 0.0166, "step": 96500 }, { "epoch": 0.7837813815560889, "grad_norm": 0.10264132171869278, "learning_rate": 1.804054654610978e-05, "loss": 0.0167, "step": 97000 }, { "epoch": 0.7878214917702955, "grad_norm": 0.06299474835395813, "learning_rate": 1.8030446270574262e-05, "loss": 0.0171, "step": 97500 }, { "epoch": 0.7918616019845022, "grad_norm": 0.052749671041965485, "learning_rate": 1.8020345995038745e-05, "loss": 0.0182, "step": 98000 }, { "epoch": 0.7959017121987088, "grad_norm": 0.14064335823059082, "learning_rate": 1.801024571950323e-05, "loss": 0.0186, "step": 98500 }, { "epoch": 0.7999418224129154, "grad_norm": 0.1007775291800499, "learning_rate": 1.8000145443967715e-05, "loss": 0.0183, "step": 99000 }, { "epoch": 0.8000226246171995, "eval_f1_macro": 0.9428325512898811, "eval_f1_micro": 0.9730330895777283, "eval_loss": 0.08679112046957016, "eval_precision_macro": 0.9804326754765803, "eval_precision_micro": 0.9888949040358451, "eval_recall_macro": 0.9118424578887557, "eval_recall_micro": 0.957672087213271, "eval_runtime": 13633.7127, "eval_samples_per_second": 2.905, "eval_steps_per_second": 0.011, "step": 99010 }, { "epoch": 0.803981932627122, "grad_norm": 0.06079207360744476, "learning_rate": 1.7990045168432195e-05, "loss": 0.0179, "step": 99500 }, { "epoch": 0.8080220428413287, "grad_norm": 0.08171634376049042, "learning_rate": 1.797994489289668e-05, "loss": 0.0174, "step": 100000 }, { "epoch": 0.8120621530555353, "grad_norm": 0.11801985651254654, "learning_rate": 1.7969844617361162e-05, "loss": 0.018, "step": 100500 }, { "epoch": 0.816102263269742, "grad_norm": 0.07442731410264969, "learning_rate": 1.7959744341825645e-05, "loss": 0.0193, "step": 101000 }, { "epoch": 0.8201423734839487, "grad_norm": 0.07479513436555862, "learning_rate": 1.7949644066290132e-05, "loss": 0.0182, "step": 101500 }, { "epoch": 0.8241824836981553, "grad_norm": 0.07510875165462494, "learning_rate": 1.7939543790754612e-05, "loss": 0.0187, "step": 102000 }, { "epoch": 0.828222593912362, "grad_norm": 0.12816323339939117, "learning_rate": 1.7929443515219095e-05, "loss": 0.018, "step": 102500 }, { "epoch": 0.8322627041265686, "grad_norm": 0.1283213347196579, "learning_rate": 1.791934323968358e-05, "loss": 0.017, "step": 103000 }, { "epoch": 0.8363028143407752, "grad_norm": 0.06121571362018585, "learning_rate": 1.7909242964148062e-05, "loss": 0.0169, "step": 103500 }, { "epoch": 0.8403429245549818, "grad_norm": 0.05697647109627724, "learning_rate": 1.789914268861255e-05, "loss": 0.0165, "step": 104000 }, { "epoch": 0.8443830347691885, "grad_norm": 0.12682537734508514, "learning_rate": 1.7889042413077032e-05, "loss": 0.0166, "step": 104500 }, { "epoch": 0.8484231449833951, "grad_norm": 0.0857871025800705, "learning_rate": 1.7878942137541512e-05, "loss": 0.0173, "step": 105000 }, { "epoch": 0.8524632551976018, "grad_norm": 0.06892874091863632, "learning_rate": 1.7868841862005996e-05, "loss": 0.0171, "step": 105500 }, { "epoch": 0.8565033654118084, "grad_norm": 0.04709647595882416, "learning_rate": 1.785874158647048e-05, "loss": 0.0159, "step": 106000 }, { "epoch": 0.8605434756260151, "grad_norm": 0.10291819274425507, "learning_rate": 1.7848641310934966e-05, "loss": 0.0165, "step": 106500 }, { "epoch": 0.8645835858402218, "grad_norm": 0.0879896804690361, "learning_rate": 1.783854103539945e-05, "loss": 0.0162, "step": 107000 }, { "epoch": 0.8686236960544284, "grad_norm": 0.06169717013835907, "learning_rate": 1.782844075986393e-05, "loss": 0.0158, "step": 107500 }, { "epoch": 0.872663806268635, "grad_norm": 0.05489352345466614, "learning_rate": 1.7818340484328413e-05, "loss": 0.0165, "step": 108000 }, { "epoch": 0.8767039164828416, "grad_norm": 0.14040745794773102, "learning_rate": 1.7808240208792896e-05, "loss": 0.0172, "step": 108500 }, { "epoch": 0.8800248870789195, "eval_f1_macro": 0.9452519445016722, "eval_f1_micro": 0.9735904566562644, "eval_loss": 0.09658095985651016, "eval_precision_macro": 0.9848024256701463, "eval_precision_micro": 0.9916469998618024, "eval_recall_macro": 0.9121362312839789, "eval_recall_micro": 0.956179724302381, "eval_runtime": 13866.6963, "eval_samples_per_second": 2.856, "eval_steps_per_second": 0.011, "step": 108911 }, { "epoch": 0.8807440266970483, "grad_norm": 0.10508357733488083, "learning_rate": 1.7798139933257383e-05, "loss": 0.0162, "step": 109000 }, { "epoch": 0.8847841369112549, "grad_norm": 0.06252790987491608, "learning_rate": 1.7788039657721866e-05, "loss": 0.0164, "step": 109500 }, { "epoch": 0.8888242471254616, "grad_norm": 0.04974674433469772, "learning_rate": 1.7777939382186346e-05, "loss": 0.0165, "step": 110000 }, { "epoch": 0.8928643573396682, "grad_norm": 0.11918849498033524, "learning_rate": 1.776783910665083e-05, "loss": 0.0174, "step": 110500 }, { "epoch": 0.8969044675538749, "grad_norm": 0.12928660213947296, "learning_rate": 1.7757738831115313e-05, "loss": 0.017, "step": 111000 }, { "epoch": 0.9009445777680816, "grad_norm": 0.06852889806032181, "learning_rate": 1.77476385555798e-05, "loss": 0.0163, "step": 111500 }, { "epoch": 0.9049846879822881, "grad_norm": 0.0549907386302948, "learning_rate": 1.7737538280044283e-05, "loss": 0.0173, "step": 112000 }, { "epoch": 0.9090247981964948, "grad_norm": 0.12298522889614105, "learning_rate": 1.7727438004508763e-05, "loss": 0.0169, "step": 112500 }, { "epoch": 0.9130649084107014, "grad_norm": 0.09733408689498901, "learning_rate": 1.7717337728973246e-05, "loss": 0.0177, "step": 113000 }, { "epoch": 0.9171050186249081, "grad_norm": 0.07251332700252533, "learning_rate": 1.770723745343773e-05, "loss": 0.0176, "step": 113500 }, { "epoch": 0.9211451288391147, "grad_norm": 0.07106909155845642, "learning_rate": 1.7697137177902216e-05, "loss": 0.0174, "step": 114000 }, { "epoch": 0.9251852390533214, "grad_norm": 0.1281566470861435, "learning_rate": 1.76870369023667e-05, "loss": 0.0176, "step": 114500 }, { "epoch": 0.929225349267528, "grad_norm": 0.09204866737127304, "learning_rate": 1.767693662683118e-05, "loss": 0.0171, "step": 115000 }, { "epoch": 0.9332654594817347, "grad_norm": 0.05850633978843689, "learning_rate": 1.7666836351295663e-05, "loss": 0.0151, "step": 115500 }, { "epoch": 0.9373055696959413, "grad_norm": 0.044992174953222275, "learning_rate": 1.7656736075760146e-05, "loss": 0.0147, "step": 116000 }, { "epoch": 0.941345679910148, "grad_norm": 0.10752815753221512, "learning_rate": 1.7646635800224633e-05, "loss": 0.0155, "step": 116500 }, { "epoch": 0.9453857901243546, "grad_norm": 0.09021549671888351, "learning_rate": 1.7636535524689117e-05, "loss": 0.0154, "step": 117000 }, { "epoch": 0.9494259003385612, "grad_norm": 0.0689893364906311, "learning_rate": 1.7626435249153596e-05, "loss": 0.0158, "step": 117500 }, { "epoch": 0.9534660105527679, "grad_norm": 0.06845594197511673, "learning_rate": 1.761633497361808e-05, "loss": 0.021, "step": 118000 }, { "epoch": 0.9575061207669745, "grad_norm": 0.11164365708827972, "learning_rate": 1.7606234698082563e-05, "loss": 0.0209, "step": 118500 }, { "epoch": 0.9600271495406395, "eval_f1_macro": 0.9436502450372893, "eval_f1_micro": 0.9732083745347168, "eval_loss": 0.0801812931895256, "eval_precision_macro": 0.9811968490819049, "eval_precision_micro": 0.9878954264431127, "eval_recall_macro": 0.9131012924150136, "eval_recall_micro": 0.9589516303534508, "eval_runtime": 13850.1368, "eval_samples_per_second": 2.859, "eval_steps_per_second": 0.011, "step": 118812 }, { "epoch": 0.9615462309811812, "grad_norm": 0.0935693234205246, "learning_rate": 1.759613442254705e-05, "loss": 0.0202, "step": 119000 }, { "epoch": 0.9655863411953878, "grad_norm": 0.06598909944295883, "learning_rate": 1.7586034147011533e-05, "loss": 0.02, "step": 119500 }, { "epoch": 0.9696264514095945, "grad_norm": 0.052590906620025635, "learning_rate": 1.7575933871476017e-05, "loss": 0.0203, "step": 120000 }, { "epoch": 0.973666561623801, "grad_norm": 0.1306983232498169, "learning_rate": 1.7565833595940497e-05, "loss": 0.0152, "step": 120500 }, { "epoch": 0.9777066718380077, "grad_norm": 0.07255972176790237, "learning_rate": 1.7555733320404983e-05, "loss": 0.0156, "step": 121000 }, { "epoch": 0.9817467820522144, "grad_norm": 0.0558183416724205, "learning_rate": 1.7545633044869467e-05, "loss": 0.0149, "step": 121500 }, { "epoch": 0.985786892266421, "grad_norm": 0.04536261036992073, "learning_rate": 1.753553276933395e-05, "loss": 0.0145, "step": 122000 }, { "epoch": 0.9898270024806277, "grad_norm": 0.12242696434259415, "learning_rate": 1.7525432493798434e-05, "loss": 0.0148, "step": 122500 }, { "epoch": 0.9938671126948343, "grad_norm": 0.09054296463727951, "learning_rate": 1.7515332218262914e-05, "loss": 0.0184, "step": 123000 }, { "epoch": 0.997907222909041, "grad_norm": 0.0703011155128479, "learning_rate": 1.75052319427274e-05, "loss": 0.0187, "step": 123500 }, { "epoch": 1.0019473331232476, "grad_norm": 0.06889301538467407, "learning_rate": 1.7495131667191884e-05, "loss": 0.0186, "step": 124000 }, { "epoch": 1.0059874433374543, "grad_norm": 0.1129370704293251, "learning_rate": 1.7485031391656367e-05, "loss": 0.0185, "step": 124500 }, { "epoch": 1.010027553551661, "grad_norm": 0.0729982927441597, "learning_rate": 1.747493111612085e-05, "loss": 0.0181, "step": 125000 }, { "epoch": 1.0140676637658674, "grad_norm": 0.19092483818531036, "learning_rate": 1.746483084058533e-05, "loss": 0.0167, "step": 125500 }, { "epoch": 1.0181077739800741, "grad_norm": 0.04695465415716171, "learning_rate": 1.7454730565049817e-05, "loss": 0.0168, "step": 126000 }, { "epoch": 1.0221478841942808, "grad_norm": 0.1297185719013214, "learning_rate": 1.74446302895143e-05, "loss": 0.0168, "step": 126500 }, { "epoch": 1.0261879944084875, "grad_norm": 0.07326006889343262, "learning_rate": 1.7434530013978784e-05, "loss": 0.0174, "step": 127000 }, { "epoch": 1.0302281046226942, "grad_norm": 0.0644180178642273, "learning_rate": 1.7424429738443267e-05, "loss": 0.0169, "step": 127500 }, { "epoch": 1.0342682148369007, "grad_norm": 0.04816208407282829, "learning_rate": 1.7414329462907747e-05, "loss": 0.0146, "step": 128000 }, { "epoch": 1.0383083250511074, "grad_norm": 0.09492602199316025, "learning_rate": 1.7404229187372234e-05, "loss": 0.0146, "step": 128500 }, { "epoch": 1.0400294120023594, "eval_f1_macro": 0.9447347774982361, "eval_f1_micro": 0.9738792089577321, "eval_loss": 0.09039987623691559, "eval_precision_macro": 0.9845539397176223, "eval_precision_micro": 0.9913320028997217, "eval_recall_macro": 0.9116629602052654, "eval_recall_micro": 0.9570303099541577, "eval_runtime": 13620.1841, "eval_samples_per_second": 2.908, "eval_steps_per_second": 0.011, "step": 128713 }, { "epoch": 1.042348435265314, "grad_norm": 0.08992265909910202, "learning_rate": 1.7394128911836717e-05, "loss": 0.014, "step": 129000 }, { "epoch": 1.0463885454795208, "grad_norm": 0.05333436280488968, "learning_rate": 1.73840286363012e-05, "loss": 0.015, "step": 129500 }, { "epoch": 1.0504286556937272, "grad_norm": 0.057117633521556854, "learning_rate": 1.7373928360765684e-05, "loss": 0.0144, "step": 130000 }, { "epoch": 1.054468765907934, "grad_norm": 0.12276995927095413, "learning_rate": 1.7363828085230164e-05, "loss": 0.0228, "step": 130500 }, { "epoch": 1.0585088761221406, "grad_norm": 0.08618568629026413, "learning_rate": 1.735372780969465e-05, "loss": 0.0229, "step": 131000 }, { "epoch": 1.0625489863363473, "grad_norm": 0.08783124387264252, "learning_rate": 1.7343627534159134e-05, "loss": 0.0222, "step": 131500 }, { "epoch": 1.066589096550554, "grad_norm": 0.06352981925010681, "learning_rate": 1.7333527258623618e-05, "loss": 0.0221, "step": 132000 }, { "epoch": 1.0706292067647605, "grad_norm": 0.10115523636341095, "learning_rate": 1.73234269830881e-05, "loss": 0.0226, "step": 132500 }, { "epoch": 1.0746693169789672, "grad_norm": 0.11306885629892349, "learning_rate": 1.731332670755258e-05, "loss": 0.0162, "step": 133000 }, { "epoch": 1.0787094271931739, "grad_norm": 0.05852317065000534, "learning_rate": 1.7303226432017068e-05, "loss": 0.0155, "step": 133500 }, { "epoch": 1.0827495374073806, "grad_norm": 0.046473681926727295, "learning_rate": 1.729312615648155e-05, "loss": 0.0149, "step": 134000 }, { "epoch": 1.086789647621587, "grad_norm": 0.11023978888988495, "learning_rate": 1.7283025880946034e-05, "loss": 0.0152, "step": 134500 }, { "epoch": 1.0908297578357937, "grad_norm": 0.07801781594753265, "learning_rate": 1.7272925605410518e-05, "loss": 0.0145, "step": 135000 }, { "epoch": 1.0948698680500004, "grad_norm": 0.057179663330316544, "learning_rate": 1.7262825329875e-05, "loss": 0.0167, "step": 135500 }, { "epoch": 1.098909978264207, "grad_norm": 0.0559101440012455, "learning_rate": 1.7252725054339484e-05, "loss": 0.0174, "step": 136000 }, { "epoch": 1.1029500884784136, "grad_norm": 0.08359610289335251, "learning_rate": 1.7242624778803968e-05, "loss": 0.0171, "step": 136500 }, { "epoch": 1.1069901986926203, "grad_norm": 0.11296004056930542, "learning_rate": 1.723252450326845e-05, "loss": 0.0172, "step": 137000 }, { "epoch": 1.111030308906827, "grad_norm": 0.061936188489198685, "learning_rate": 1.7222424227732935e-05, "loss": 0.0173, "step": 137500 }, { "epoch": 1.1150704191210337, "grad_norm": 0.07334394752979279, "learning_rate": 1.7212323952197418e-05, "loss": 0.0171, "step": 138000 }, { "epoch": 1.1191105293352404, "grad_norm": 0.10479886829853058, "learning_rate": 1.72022236766619e-05, "loss": 0.0167, "step": 138500 }, { "epoch": 1.1200316744640795, "eval_f1_macro": 0.9448181357644012, "eval_f1_micro": 0.9737656699358889, "eval_loss": 0.07901577651500702, "eval_precision_macro": 0.9836772488732869, "eval_precision_micro": 0.9897679811194957, "eval_recall_macro": 0.9127768967177174, "eval_recall_micro": 0.9582725683902614, "eval_runtime": 13749.2598, "eval_samples_per_second": 2.88, "eval_steps_per_second": 0.011, "step": 138614 }, { "epoch": 1.1231506395494468, "grad_norm": 0.07909916341304779, "learning_rate": 1.7192123401126385e-05, "loss": 0.0161, "step": 139000 }, { "epoch": 1.1271907497636535, "grad_norm": 0.08238150179386139, "learning_rate": 1.7182023125590868e-05, "loss": 0.017, "step": 139500 }, { "epoch": 1.1312308599778602, "grad_norm": 0.06267368793487549, "learning_rate": 1.717192285005535e-05, "loss": 0.0164, "step": 140000 }, { "epoch": 1.135270970192067, "grad_norm": 0.11608216911554337, "learning_rate": 1.7161822574519835e-05, "loss": 0.0165, "step": 140500 }, { "epoch": 1.1393110804062734, "grad_norm": 0.10431836545467377, "learning_rate": 1.7151722298984318e-05, "loss": 0.0161, "step": 141000 }, { "epoch": 1.14335119062048, "grad_norm": 0.06495651602745056, "learning_rate": 1.71416220234488e-05, "loss": 0.0165, "step": 141500 }, { "epoch": 1.1473913008346868, "grad_norm": 0.04861852526664734, "learning_rate": 1.7131521747913285e-05, "loss": 0.0163, "step": 142000 }, { "epoch": 1.1514314110488935, "grad_norm": 0.17824631929397583, "learning_rate": 1.7121421472377768e-05, "loss": 0.0159, "step": 142500 }, { "epoch": 1.1554715212631002, "grad_norm": 0.08877791464328766, "learning_rate": 1.711132119684225e-05, "loss": 0.0171, "step": 143000 }, { "epoch": 1.1595116314773066, "grad_norm": 0.06289026886224747, "learning_rate": 1.7101220921306735e-05, "loss": 0.0166, "step": 143500 }, { "epoch": 1.1635517416915133, "grad_norm": 0.0498519092798233, "learning_rate": 1.709112064577122e-05, "loss": 0.0169, "step": 144000 }, { "epoch": 1.16759185190572, "grad_norm": 0.13069184124469757, "learning_rate": 1.7081020370235702e-05, "loss": 0.0168, "step": 144500 }, { "epoch": 1.1716319621199267, "grad_norm": 0.09042539447546005, "learning_rate": 1.7070920094700185e-05, "loss": 0.0168, "step": 145000 }, { "epoch": 1.1756720723341332, "grad_norm": 0.05690092593431473, "learning_rate": 1.706081981916467e-05, "loss": 0.0166, "step": 145500 }, { "epoch": 1.1797121825483399, "grad_norm": 0.0493723563849926, "learning_rate": 1.7050719543629152e-05, "loss": 0.017, "step": 146000 }, { "epoch": 1.1837522927625466, "grad_norm": 0.10125371068716049, "learning_rate": 1.7040619268093635e-05, "loss": 0.0165, "step": 146500 }, { "epoch": 1.1877924029767533, "grad_norm": 0.0926498994231224, "learning_rate": 1.703051899255812e-05, "loss": 0.0163, "step": 147000 }, { "epoch": 1.19183251319096, "grad_norm": 0.06617089360952377, "learning_rate": 1.7020418717022602e-05, "loss": 0.0168, "step": 147500 }, { "epoch": 1.1958726234051664, "grad_norm": 0.05541488900780678, "learning_rate": 1.7010318441487085e-05, "loss": 0.0192, "step": 148000 }, { "epoch": 1.1999127336193731, "grad_norm": 0.12656770646572113, "learning_rate": 1.700021816595157e-05, "loss": 0.0193, "step": 148500 }, { "epoch": 1.2000339369257993, "eval_f1_macro": 0.9460294234862895, "eval_f1_micro": 0.9741125567825796, "eval_loss": 0.08067350834608078, "eval_precision_macro": 0.986194906743568, "eval_precision_micro": 0.9906935766072956, "eval_recall_macro": 0.9129872709919727, "eval_recall_micro": 0.9580774262702744, "eval_runtime": 13826.0506, "eval_samples_per_second": 2.864, "eval_steps_per_second": 0.011, "step": 148515 }, { "epoch": 1.2039528438335798, "grad_norm": 0.10946424305438995, "learning_rate": 1.6990117890416052e-05, "loss": 0.019, "step": 149000 }, { "epoch": 1.2079929540477865, "grad_norm": 0.05134887993335724, "learning_rate": 1.6980017614880535e-05, "loss": 0.0177, "step": 149500 }, { "epoch": 1.212033064261993, "grad_norm": 0.08791927248239517, "learning_rate": 1.696991733934502e-05, "loss": 0.0188, "step": 150000 }, { "epoch": 1.2160731744761997, "grad_norm": 0.11116321384906769, "learning_rate": 1.6959817063809502e-05, "loss": 0.014, "step": 150500 }, { "epoch": 1.2201132846904064, "grad_norm": 0.07135743647813797, "learning_rate": 1.6949716788273986e-05, "loss": 0.0133, "step": 151000 }, { "epoch": 1.224153394904613, "grad_norm": 0.06051028147339821, "learning_rate": 1.693961651273847e-05, "loss": 0.014, "step": 151500 }, { "epoch": 1.2281935051188198, "grad_norm": 0.05637380853295326, "learning_rate": 1.6929516237202952e-05, "loss": 0.0136, "step": 152000 }, { "epoch": 1.2322336153330262, "grad_norm": 0.2139320969581604, "learning_rate": 1.6919415961667436e-05, "loss": 0.014, "step": 152500 }, { "epoch": 1.236273725547233, "grad_norm": 0.10385521501302719, "learning_rate": 1.690931568613192e-05, "loss": 0.0136, "step": 153000 }, { "epoch": 1.2403138357614396, "grad_norm": 0.052428074181079865, "learning_rate": 1.6899215410596402e-05, "loss": 0.0143, "step": 153500 }, { "epoch": 1.2443539459756463, "grad_norm": 0.0810508131980896, "learning_rate": 1.6889115135060886e-05, "loss": 0.0136, "step": 154000 }, { "epoch": 1.2483940561898528, "grad_norm": 0.1127280592918396, "learning_rate": 1.687901485952537e-05, "loss": 0.0131, "step": 154500 }, { "epoch": 1.2524341664040595, "grad_norm": 0.0869458019733429, "learning_rate": 1.6868914583989852e-05, "loss": 0.0134, "step": 155000 }, { "epoch": 1.2564742766182662, "grad_norm": 0.055589742958545685, "learning_rate": 1.6858814308454336e-05, "loss": 0.0131, "step": 155500 }, { "epoch": 1.2605143868324729, "grad_norm": 0.07655055820941925, "learning_rate": 1.684871403291882e-05, "loss": 0.0133, "step": 156000 }, { "epoch": 1.2645544970466793, "grad_norm": 0.10124019533395767, "learning_rate": 1.6838613757383303e-05, "loss": 0.0134, "step": 156500 }, { "epoch": 1.268594607260886, "grad_norm": 0.06868778169155121, "learning_rate": 1.6828513481847786e-05, "loss": 0.0131, "step": 157000 }, { "epoch": 1.2726347174750927, "grad_norm": 0.05508118122816086, "learning_rate": 1.681841320631227e-05, "loss": 0.013, "step": 157500 }, { "epoch": 1.2766748276892994, "grad_norm": 0.061807744204998016, "learning_rate": 1.6808312930776753e-05, "loss": 0.0165, "step": 158000 }, { "epoch": 1.2800361993875193, "eval_f1_macro": 0.9468881762987015, "eval_f1_micro": 0.974322016191991, "eval_loss": 0.08317849040031433, "eval_precision_macro": 0.9867617789603411, "eval_precision_micro": 0.9924250039485718, "eval_recall_macro": 0.9135532474499787, "eval_recall_micro": 0.9568676362293653, "eval_runtime": 13523.4341, "eval_samples_per_second": 2.928, "eval_steps_per_second": 0.011, "step": 158416 }, { "epoch": 1.280714937903506, "grad_norm": 0.14820145070552826, "learning_rate": 1.6798212655241236e-05, "loss": 0.0154, "step": 158500 }, { "epoch": 1.2847550481177126, "grad_norm": 0.066920705139637, "learning_rate": 1.678811237970572e-05, "loss": 0.0165, "step": 159000 }, { "epoch": 1.2887951583319193, "grad_norm": 0.05135662853717804, "learning_rate": 1.6778012104170203e-05, "loss": 0.0157, "step": 159500 }, { "epoch": 1.292835268546126, "grad_norm": 0.0481293685734272, "learning_rate": 1.6767911828634686e-05, "loss": 0.0157, "step": 160000 }, { "epoch": 1.2968753787603327, "grad_norm": 0.11119942367076874, "learning_rate": 1.675781155309917e-05, "loss": 0.0175, "step": 160500 }, { "epoch": 1.3009154889745391, "grad_norm": 0.10568433254957199, "learning_rate": 1.6747711277563653e-05, "loss": 0.0195, "step": 161000 }, { "epoch": 1.3049555991887458, "grad_norm": 0.070424385368824, "learning_rate": 1.6737611002028136e-05, "loss": 0.0187, "step": 161500 }, { "epoch": 1.3089957094029525, "grad_norm": 0.055738966912031174, "learning_rate": 1.672751072649262e-05, "loss": 0.0178, "step": 162000 }, { "epoch": 1.3130358196171592, "grad_norm": 0.13051150739192963, "learning_rate": 1.6717410450957103e-05, "loss": 0.0184, "step": 162500 }, { "epoch": 1.317075929831366, "grad_norm": 0.07910241186618805, "learning_rate": 1.6707310175421586e-05, "loss": 0.0155, "step": 163000 }, { "epoch": 1.3211160400455724, "grad_norm": 0.15667231380939484, "learning_rate": 1.669720989988607e-05, "loss": 0.0156, "step": 163500 }, { "epoch": 1.325156150259779, "grad_norm": 0.1987818032503128, "learning_rate": 1.6687109624350553e-05, "loss": 0.0152, "step": 164000 }, { "epoch": 1.3291962604739858, "grad_norm": 0.13924378156661987, "learning_rate": 1.6677009348815036e-05, "loss": 0.0149, "step": 164500 }, { "epoch": 1.3332363706881925, "grad_norm": 0.07680565118789673, "learning_rate": 1.666690907327952e-05, "loss": 0.0152, "step": 165000 }, { "epoch": 1.337276480902399, "grad_norm": 0.10616718977689743, "learning_rate": 1.6656808797744003e-05, "loss": 0.0248, "step": 165500 }, { "epoch": 1.3413165911166056, "grad_norm": 0.14228446781635284, "learning_rate": 1.6646708522208487e-05, "loss": 0.0255, "step": 166000 }, { "epoch": 1.3453567013308123, "grad_norm": 0.12593576312065125, "learning_rate": 1.6636608246672973e-05, "loss": 0.0249, "step": 166500 }, { "epoch": 1.349396811545019, "grad_norm": 0.14932659268379211, "learning_rate": 1.6626507971137453e-05, "loss": 0.0253, "step": 167000 }, { "epoch": 1.3534369217592257, "grad_norm": 0.09529467672109604, "learning_rate": 1.6616407695601937e-05, "loss": 0.0248, "step": 167500 }, { "epoch": 1.3574770319734322, "grad_norm": 0.048431217670440674, "learning_rate": 1.660630742006642e-05, "loss": 0.0151, "step": 168000 }, { "epoch": 1.3600384618492392, "eval_f1_macro": 0.9470943315331984, "eval_f1_micro": 0.9744685617640599, "eval_loss": 0.08101344108581543, "eval_precision_macro": 0.9875637466039148, "eval_precision_micro": 0.9922827909185198, "eval_recall_macro": 0.9134025250498142, "eval_recall_micro": 0.957282681677187, "eval_runtime": 13286.0274, "eval_samples_per_second": 2.981, "eval_steps_per_second": 0.012, "step": 168317 }, { "epoch": 1.3615171421876389, "grad_norm": 0.10621971637010574, "learning_rate": 1.6596207144530903e-05, "loss": 0.0152, "step": 168500 }, { "epoch": 1.3655572524018456, "grad_norm": 0.07011255621910095, "learning_rate": 1.658610686899539e-05, "loss": 0.015, "step": 169000 }, { "epoch": 1.3695973626160522, "grad_norm": 0.05363575369119644, "learning_rate": 1.657600659345987e-05, "loss": 0.0148, "step": 169500 }, { "epoch": 1.3736374728302587, "grad_norm": 0.14870333671569824, "learning_rate": 1.6565906317924354e-05, "loss": 0.0148, "step": 170000 }, { "epoch": 1.3776775830444654, "grad_norm": 0.11409811675548553, "learning_rate": 1.6555806042388837e-05, "loss": 0.0158, "step": 170500 }, { "epoch": 1.381717693258672, "grad_norm": 0.11164900660514832, "learning_rate": 1.654570576685332e-05, "loss": 0.0148, "step": 171000 }, { "epoch": 1.3857578034728788, "grad_norm": 0.08794820308685303, "learning_rate": 1.6535605491317807e-05, "loss": 0.0158, "step": 171500 }, { "epoch": 1.3897979136870853, "grad_norm": 0.060815006494522095, "learning_rate": 1.6525505215782287e-05, "loss": 0.0145, "step": 172000 }, { "epoch": 1.393838023901292, "grad_norm": 0.12906509637832642, "learning_rate": 1.651540494024677e-05, "loss": 0.0153, "step": 172500 }, { "epoch": 1.3978781341154987, "grad_norm": 0.09560517966747284, "learning_rate": 1.6505304664711254e-05, "loss": 0.026, "step": 173000 }, { "epoch": 1.4019182443297054, "grad_norm": 0.05908598750829697, "learning_rate": 1.6495204389175737e-05, "loss": 0.0253, "step": 173500 }, { "epoch": 1.405958354543912, "grad_norm": 0.06017552688717842, "learning_rate": 1.6485104113640224e-05, "loss": 0.0248, "step": 174000 }, { "epoch": 1.4099984647581185, "grad_norm": 0.10513614118099213, "learning_rate": 1.6475003838104704e-05, "loss": 0.0243, "step": 174500 }, { "epoch": 1.4140385749723252, "grad_norm": 0.08137038350105286, "learning_rate": 1.6464903562569187e-05, "loss": 0.0243, "step": 175000 }, { "epoch": 1.418078685186532, "grad_norm": 0.07494989782571793, "learning_rate": 1.645480328703367e-05, "loss": 0.0202, "step": 175500 }, { "epoch": 1.4221187954007386, "grad_norm": 0.05562291666865349, "learning_rate": 1.6444703011498154e-05, "loss": 0.0204, "step": 176000 }, { "epoch": 1.426158905614945, "grad_norm": 0.11044422537088394, "learning_rate": 1.643460273596264e-05, "loss": 0.0202, "step": 176500 }, { "epoch": 1.4301990158291518, "grad_norm": 0.11972752958536148, "learning_rate": 1.642450246042712e-05, "loss": 0.0195, "step": 177000 }, { "epoch": 1.4342391260433585, "grad_norm": 0.06898529082536697, "learning_rate": 1.6414402184891604e-05, "loss": 0.0203, "step": 177500 }, { "epoch": 1.4382792362575652, "grad_norm": 0.05580909922719002, "learning_rate": 1.6404301909356087e-05, "loss": 0.0124, "step": 178000 }, { "epoch": 1.4400407243109592, "eval_f1_macro": 0.9458974211933513, "eval_f1_micro": 0.974213850978252, "eval_loss": 0.09569641947746277, "eval_precision_macro": 0.9869461304954816, "eval_precision_micro": 0.9919091180407337, "eval_recall_macro": 0.9122157060173365, "eval_recall_micro": 0.9571388713888294, "eval_runtime": 13113.6746, "eval_samples_per_second": 3.02, "eval_steps_per_second": 0.012, "step": 178218 }, { "epoch": 1.4423193464717718, "grad_norm": 0.09399819374084473, "learning_rate": 1.639420163382057e-05, "loss": 0.0116, "step": 178500 }, { "epoch": 1.4463594566859783, "grad_norm": 0.06601426005363464, "learning_rate": 1.6384101358285058e-05, "loss": 0.0117, "step": 179000 }, { "epoch": 1.450399566900185, "grad_norm": 0.11237422376871109, "learning_rate": 1.6374001082749538e-05, "loss": 0.0115, "step": 179500 }, { "epoch": 1.4544396771143917, "grad_norm": 0.04262951388955116, "learning_rate": 1.636390080721402e-05, "loss": 0.0112, "step": 180000 }, { "epoch": 1.4584797873285984, "grad_norm": 0.13000500202178955, "learning_rate": 1.6353800531678504e-05, "loss": 0.0132, "step": 180500 }, { "epoch": 1.4625198975428049, "grad_norm": 0.0949823409318924, "learning_rate": 1.6343700256142988e-05, "loss": 0.0129, "step": 181000 }, { "epoch": 1.4665600077570116, "grad_norm": 0.04730290174484253, "learning_rate": 1.6333599980607474e-05, "loss": 0.0129, "step": 181500 }, { "epoch": 1.4706001179712183, "grad_norm": 0.050584714859724045, "learning_rate": 1.6323499705071958e-05, "loss": 0.013, "step": 182000 }, { "epoch": 1.474640228185425, "grad_norm": 0.1683996021747589, "learning_rate": 1.6313399429536438e-05, "loss": 0.0133, "step": 182500 }, { "epoch": 1.4786803383996316, "grad_norm": 0.1036485880613327, "learning_rate": 1.630329915400092e-05, "loss": 0.0136, "step": 183000 }, { "epoch": 1.4827204486138381, "grad_norm": 0.11697889119386673, "learning_rate": 1.6293198878465404e-05, "loss": 0.0133, "step": 183500 }, { "epoch": 1.4867605588280448, "grad_norm": 0.0688479095697403, "learning_rate": 1.628309860292989e-05, "loss": 0.0132, "step": 184000 }, { "epoch": 1.4908006690422515, "grad_norm": 0.12002038955688477, "learning_rate": 1.6272998327394375e-05, "loss": 0.0131, "step": 184500 }, { "epoch": 1.4948407792564582, "grad_norm": 0.08021160215139389, "learning_rate": 1.6262898051858855e-05, "loss": 0.0133, "step": 185000 }, { "epoch": 1.4988808894706647, "grad_norm": 0.07343757152557373, "learning_rate": 1.6252797776323338e-05, "loss": 0.0135, "step": 185500 }, { "epoch": 1.5029209996848714, "grad_norm": 0.058117810636758804, "learning_rate": 1.624269750078782e-05, "loss": 0.0143, "step": 186000 }, { "epoch": 1.506961109899078, "grad_norm": 0.10462002456188202, "learning_rate": 1.6232597225252308e-05, "loss": 0.0138, "step": 186500 }, { "epoch": 1.5110012201132847, "grad_norm": 0.07825891673564911, "learning_rate": 1.622249694971679e-05, "loss": 0.0141, "step": 187000 }, { "epoch": 1.5150413303274914, "grad_norm": 0.05809338763356209, "learning_rate": 1.621239667418127e-05, "loss": 0.0136, "step": 187500 }, { "epoch": 1.519081440541698, "grad_norm": 0.05035299435257912, "learning_rate": 1.6202296398645755e-05, "loss": 0.0165, "step": 188000 }, { "epoch": 1.5200429867726792, "eval_f1_macro": 0.9465345293457039, "eval_f1_micro": 0.9742553945189574, "eval_loss": 0.08490300178527832, "eval_precision_macro": 0.9844654624463276, "eval_precision_micro": 0.9898738168824952, "eval_recall_macro": 0.9150699179615246, "eval_recall_micro": 0.9591221775256943, "eval_runtime": 13330.8537, "eval_samples_per_second": 2.971, "eval_steps_per_second": 0.012, "step": 188119 }, { "epoch": 1.5231215507559046, "grad_norm": 0.14376361668109894, "learning_rate": 1.6192196123110238e-05, "loss": 0.0157, "step": 188500 }, { "epoch": 1.5271616609701113, "grad_norm": 0.07897575944662094, "learning_rate": 1.6182095847574725e-05, "loss": 0.0159, "step": 189000 }, { "epoch": 1.531201771184318, "grad_norm": 0.06912536919116974, "learning_rate": 1.6171995572039208e-05, "loss": 0.0163, "step": 189500 }, { "epoch": 1.5352418813985245, "grad_norm": 0.05066482350230217, "learning_rate": 1.6161895296503688e-05, "loss": 0.0156, "step": 190000 }, { "epoch": 1.5392819916127312, "grad_norm": 0.14292369782924652, "learning_rate": 1.615179502096817e-05, "loss": 0.0198, "step": 190500 }, { "epoch": 1.5433221018269379, "grad_norm": 0.08798356354236603, "learning_rate": 1.614169474543266e-05, "loss": 0.0197, "step": 191000 }, { "epoch": 1.5473622120411445, "grad_norm": 0.061990030109882355, "learning_rate": 1.6131594469897142e-05, "loss": 0.0183, "step": 191500 }, { "epoch": 1.551402322255351, "grad_norm": 0.05433070659637451, "learning_rate": 1.6121494194361625e-05, "loss": 0.0183, "step": 192000 }, { "epoch": 1.5554424324695577, "grad_norm": 0.13680632412433624, "learning_rate": 1.6111393918826105e-05, "loss": 0.0192, "step": 192500 }, { "epoch": 1.5594825426837644, "grad_norm": 0.1941196620464325, "learning_rate": 1.610129364329059e-05, "loss": 0.0176, "step": 193000 }, { "epoch": 1.563522652897971, "grad_norm": 0.08578658103942871, "learning_rate": 1.6091193367755075e-05, "loss": 0.0173, "step": 193500 }, { "epoch": 1.5675627631121778, "grad_norm": 0.04361563175916672, "learning_rate": 1.608109309221956e-05, "loss": 0.0171, "step": 194000 }, { "epoch": 1.5716028733263843, "grad_norm": 0.12448256462812424, "learning_rate": 1.6070992816684042e-05, "loss": 0.0168, "step": 194500 }, { "epoch": 1.575642983540591, "grad_norm": 0.10221997648477554, "learning_rate": 1.6060892541148522e-05, "loss": 0.017, "step": 195000 }, { "epoch": 1.5796830937547977, "grad_norm": 0.07009778171777725, "learning_rate": 1.6050792265613005e-05, "loss": 0.0175, "step": 195500 }, { "epoch": 1.5837232039690043, "grad_norm": 0.06714298576116562, "learning_rate": 1.6040691990077492e-05, "loss": 0.0174, "step": 196000 }, { "epoch": 1.5877633141832108, "grad_norm": 0.12766534090042114, "learning_rate": 1.6030591714541975e-05, "loss": 0.0179, "step": 196500 }, { "epoch": 1.5918034243974175, "grad_norm": 0.10328399389982224, "learning_rate": 1.602049143900646e-05, "loss": 0.0175, "step": 197000 }, { "epoch": 1.5958435346116242, "grad_norm": 0.09311484545469284, "learning_rate": 1.601039116347094e-05, "loss": 0.0172, "step": 197500 }, { "epoch": 1.599883644825831, "grad_norm": 0.08157425373792648, "learning_rate": 1.6000290887935422e-05, "loss": 0.0171, "step": 198000 }, { "epoch": 1.600045249234399, "eval_f1_macro": 0.9462988853572672, "eval_f1_micro": 0.9743141624468545, "eval_loss": 0.09824506938457489, "eval_precision_macro": 0.9878799683485701, "eval_precision_micro": 0.9928597658940401, "eval_recall_macro": 0.9118503420886092, "eval_recall_micro": 0.9564486807740753, "eval_runtime": 13226.8299, "eval_samples_per_second": 2.994, "eval_steps_per_second": 0.012, "step": 198020 }, { "epoch": 1.6039237550400376, "grad_norm": 0.10958320647478104, "learning_rate": 1.599019061239991e-05, "loss": 0.0167, "step": 198500 }, { "epoch": 1.607963865254244, "grad_norm": 0.07280286401510239, "learning_rate": 1.5980090336864392e-05, "loss": 0.0164, "step": 199000 }, { "epoch": 1.6120039754684508, "grad_norm": 0.0816897377371788, "learning_rate": 1.5969990061328876e-05, "loss": 0.017, "step": 199500 }, { "epoch": 1.6160440856826574, "grad_norm": 0.046233151108026505, "learning_rate": 1.595988978579336e-05, "loss": 0.0163, "step": 200000 }, { "epoch": 1.6200841958968641, "grad_norm": 0.13440461456775665, "learning_rate": 1.594978951025784e-05, "loss": 0.015, "step": 200500 }, { "epoch": 1.6241243061110706, "grad_norm": 0.0861237496137619, "learning_rate": 1.5939689234722326e-05, "loss": 0.0146, "step": 201000 }, { "epoch": 1.6281644163252773, "grad_norm": 0.06643826514482498, "learning_rate": 1.592958895918681e-05, "loss": 0.0142, "step": 201500 }, { "epoch": 1.632204526539484, "grad_norm": 0.06138383969664574, "learning_rate": 1.5919488683651292e-05, "loss": 0.0143, "step": 202000 }, { "epoch": 1.6362446367536907, "grad_norm": 0.13212205469608307, "learning_rate": 1.5909388408115776e-05, "loss": 0.0147, "step": 202500 }, { "epoch": 1.6402847469678974, "grad_norm": 0.07676049321889877, "learning_rate": 1.5899288132580256e-05, "loss": 0.0151, "step": 203000 }, { "epoch": 1.6443248571821039, "grad_norm": 0.10008609294891357, "learning_rate": 1.5889187857044743e-05, "loss": 0.0152, "step": 203500 }, { "epoch": 1.6483649673963106, "grad_norm": 0.04750071465969086, "learning_rate": 1.5879087581509226e-05, "loss": 0.0157, "step": 204000 }, { "epoch": 1.6524050776105172, "grad_norm": 0.11740187555551529, "learning_rate": 1.586898730597371e-05, "loss": 0.0155, "step": 204500 }, { "epoch": 1.656445187824724, "grad_norm": 0.06920389086008072, "learning_rate": 1.5858887030438193e-05, "loss": 0.0156, "step": 205000 }, { "epoch": 1.6604852980389304, "grad_norm": 0.05165468528866768, "learning_rate": 1.5848786754902673e-05, "loss": 0.0151, "step": 205500 }, { "epoch": 1.664525408253137, "grad_norm": 0.07880023121833801, "learning_rate": 1.583868647936716e-05, "loss": 0.0152, "step": 206000 }, { "epoch": 1.6685655184673438, "grad_norm": 0.11061804741621017, "learning_rate": 1.5828586203831643e-05, "loss": 0.015, "step": 206500 }, { "epoch": 1.6726056286815505, "grad_norm": 0.08423452824354172, "learning_rate": 1.5818485928296126e-05, "loss": 0.015, "step": 207000 }, { "epoch": 1.6766457388957572, "grad_norm": 0.07225336134433746, "learning_rate": 1.580838565276061e-05, "loss": 0.0148, "step": 207500 }, { "epoch": 1.680047511696119, "eval_f1_macro": 0.9464641577937303, "eval_f1_micro": 0.9740978113062491, "eval_loss": 0.08534455299377441, "eval_precision_macro": 0.9875234035936792, "eval_precision_micro": 0.9923821627163134, "eval_recall_macro": 0.9123021051137999, "eval_recall_micro": 0.9564750381751005, "eval_runtime": 12543.2073, "eval_samples_per_second": 3.157, "eval_steps_per_second": 0.012, "step": 207921 }, { "epoch": 1.6806858491099637, "grad_norm": 0.05870038643479347, "learning_rate": 1.579828537722509e-05, "loss": 0.0197, "step": 208000 }, { "epoch": 1.6847259593241704, "grad_norm": 0.11144687980413437, "learning_rate": 1.5788185101689576e-05, "loss": 0.0197, "step": 208500 }, { "epoch": 1.688766069538377, "grad_norm": 0.07475966960191727, "learning_rate": 1.577808482615406e-05, "loss": 0.0195, "step": 209000 }, { "epoch": 1.6928061797525837, "grad_norm": 0.05573410540819168, "learning_rate": 1.5767984550618543e-05, "loss": 0.0202, "step": 209500 }, { "epoch": 1.6968462899667902, "grad_norm": 0.07953529059886932, "learning_rate": 1.5757884275083026e-05, "loss": 0.0191, "step": 210000 }, { "epoch": 1.700886400180997, "grad_norm": 0.08590356260538101, "learning_rate": 1.5747783999547506e-05, "loss": 0.0147, "step": 210500 }, { "epoch": 1.7049265103952036, "grad_norm": 0.08645664155483246, "learning_rate": 1.5737683724011993e-05, "loss": 0.0145, "step": 211000 }, { "epoch": 1.7089666206094103, "grad_norm": 0.059178948402404785, "learning_rate": 1.5727583448476476e-05, "loss": 0.0139, "step": 211500 }, { "epoch": 1.7130067308236168, "grad_norm": 0.05445469170808792, "learning_rate": 1.571748317294096e-05, "loss": 0.0143, "step": 212000 }, { "epoch": 1.7170468410378235, "grad_norm": 0.10709578543901443, "learning_rate": 1.5707382897405443e-05, "loss": 0.0141, "step": 212500 }, { "epoch": 1.7210869512520302, "grad_norm": 0.0663144662976265, "learning_rate": 1.5697282621869923e-05, "loss": 0.0121, "step": 213000 }, { "epoch": 1.7251270614662368, "grad_norm": 0.0667869821190834, "learning_rate": 1.568718234633441e-05, "loss": 0.0125, "step": 213500 }, { "epoch": 1.7291671716804435, "grad_norm": 0.09561540186405182, "learning_rate": 1.5677082070798893e-05, "loss": 0.0122, "step": 214000 }, { "epoch": 1.73320728189465, "grad_norm": 0.09017562866210938, "learning_rate": 1.5666981795263377e-05, "loss": 0.0128, "step": 214500 }, { "epoch": 1.7372473921088567, "grad_norm": 0.06796102970838547, "learning_rate": 1.565688151972786e-05, "loss": 0.0129, "step": 215000 }, { "epoch": 1.7412875023230634, "grad_norm": 0.06975946575403214, "learning_rate": 1.5646781244192343e-05, "loss": 0.0126, "step": 215500 }, { "epoch": 1.74532761253727, "grad_norm": 0.04627285152673721, "learning_rate": 1.5636680968656827e-05, "loss": 0.0126, "step": 216000 }, { "epoch": 1.7493677227514766, "grad_norm": 0.12213249504566193, "learning_rate": 1.562658069312131e-05, "loss": 0.0124, "step": 216500 }, { "epoch": 1.7534078329656833, "grad_norm": 0.0799461305141449, "learning_rate": 1.5616480417585793e-05, "loss": 0.0125, "step": 217000 }, { "epoch": 1.75744794317989, "grad_norm": 0.05975542962551117, "learning_rate": 1.5606380142050277e-05, "loss": 0.0124, "step": 217500 }, { "epoch": 1.760049774157839, "eval_f1_macro": 0.9468023383313312, "eval_f1_micro": 0.9742265323429071, "eval_loss": 0.08699483424425125, "eval_precision_macro": 0.9835541832835969, "eval_precision_micro": 0.9902263876070855, "eval_recall_macro": 0.915890529699618, "eval_recall_micro": 0.9587354997620732, "eval_runtime": 12459.0163, "eval_samples_per_second": 3.179, "eval_steps_per_second": 0.012, "step": 217822 } ], "logging_steps": 500, "max_steps": 990072, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 9901, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.468409508314638e+19, "train_batch_size": 256, "trial_name": null, "trial_params": null }