neurobiber / trainer_state.json
kalkiek95's picture
Upload 12 files
996ff9d verified
{
"best_metric": 0.9470943315331984,
"best_model_checkpoint": "/shared/3/projects/hiatus/tagged_data/models/roberta-base/binary-finetune-full/results/checkpoint-168317",
"epoch": 1.760049774157839,
"eval_steps": 9901,
"global_step": 217822,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004040110214206643,
"grad_norm": 0.24907590448856354,
"learning_rate": 1.9989899724464485e-05,
"loss": 0.3886,
"step": 500
},
{
"epoch": 0.008080220428413287,
"grad_norm": 0.19640618562698364,
"learning_rate": 1.997979944892897e-05,
"loss": 0.2835,
"step": 1000
},
{
"epoch": 0.01212033064261993,
"grad_norm": 0.21297627687454224,
"learning_rate": 1.9969699173393452e-05,
"loss": 0.2123,
"step": 1500
},
{
"epoch": 0.016160440856826573,
"grad_norm": 0.1339714229106903,
"learning_rate": 1.9959598897857935e-05,
"loss": 0.1631,
"step": 2000
},
{
"epoch": 0.020200551071033218,
"grad_norm": 0.20592452585697174,
"learning_rate": 1.994949862232242e-05,
"loss": 0.1306,
"step": 2500
},
{
"epoch": 0.02424066128523986,
"grad_norm": 0.1734761744737625,
"learning_rate": 1.9939398346786902e-05,
"loss": 0.1134,
"step": 3000
},
{
"epoch": 0.028280771499446505,
"grad_norm": 0.1429387331008911,
"learning_rate": 1.9929298071251385e-05,
"loss": 0.0954,
"step": 3500
},
{
"epoch": 0.032320881713653146,
"grad_norm": 0.13530635833740234,
"learning_rate": 1.991919779571587e-05,
"loss": 0.0833,
"step": 4000
},
{
"epoch": 0.03636099192785979,
"grad_norm": 0.16425204277038574,
"learning_rate": 1.9909097520180352e-05,
"loss": 0.0739,
"step": 4500
},
{
"epoch": 0.040401102142066436,
"grad_norm": 0.15035004913806915,
"learning_rate": 1.9898997244644835e-05,
"loss": 0.0657,
"step": 5000
},
{
"epoch": 0.04444121235627308,
"grad_norm": 0.1048048660159111,
"learning_rate": 1.988889696910932e-05,
"loss": 0.0606,
"step": 5500
},
{
"epoch": 0.04848132257047972,
"grad_norm": 0.1167823076248169,
"learning_rate": 1.9878796693573802e-05,
"loss": 0.0555,
"step": 6000
},
{
"epoch": 0.052521432784686364,
"grad_norm": 0.17904673516750336,
"learning_rate": 1.9868696418038285e-05,
"loss": 0.0517,
"step": 6500
},
{
"epoch": 0.05656154299889301,
"grad_norm": 0.22022856771945953,
"learning_rate": 1.985859614250277e-05,
"loss": 0.0483,
"step": 7000
},
{
"epoch": 0.060601653213099654,
"grad_norm": 0.11566773056983948,
"learning_rate": 1.9848495866967252e-05,
"loss": 0.0445,
"step": 7500
},
{
"epoch": 0.06464176342730629,
"grad_norm": 0.09606140851974487,
"learning_rate": 1.9838395591431736e-05,
"loss": 0.0434,
"step": 8000
},
{
"epoch": 0.06868187364151294,
"grad_norm": 0.14014209806919098,
"learning_rate": 1.982829531589622e-05,
"loss": 0.0415,
"step": 8500
},
{
"epoch": 0.07272198385571958,
"grad_norm": 0.1238802894949913,
"learning_rate": 1.9818195040360702e-05,
"loss": 0.0401,
"step": 9000
},
{
"epoch": 0.07676209406992623,
"grad_norm": 0.27284470200538635,
"learning_rate": 1.9808094764825186e-05,
"loss": 0.0375,
"step": 9500
},
{
"epoch": 0.08000226246171996,
"eval_f1_macro": 0.8895664094153212,
"eval_f1_micro": 0.9635283563255777,
"eval_loss": 0.11441826820373535,
"eval_precision_macro": 0.924616270515263,
"eval_precision_micro": 0.9673416146866455,
"eval_recall_macro": 0.8766278018110966,
"eval_recall_micro": 0.9597450436296798,
"eval_runtime": 13336.1892,
"eval_samples_per_second": 2.97,
"eval_steps_per_second": 0.012,
"step": 9901
},
{
"epoch": 0.08080220428413287,
"grad_norm": 0.07111234217882156,
"learning_rate": 1.979799448928967e-05,
"loss": 0.037,
"step": 10000
},
{
"epoch": 0.08484231449833951,
"grad_norm": 0.11708366125822067,
"learning_rate": 1.9787894213754152e-05,
"loss": 0.0337,
"step": 10500
},
{
"epoch": 0.08888242471254616,
"grad_norm": 0.12418048083782196,
"learning_rate": 1.9777793938218636e-05,
"loss": 0.0331,
"step": 11000
},
{
"epoch": 0.0929225349267528,
"grad_norm": 0.09599123895168304,
"learning_rate": 1.976769366268312e-05,
"loss": 0.032,
"step": 11500
},
{
"epoch": 0.09696264514095944,
"grad_norm": 0.08350682258605957,
"learning_rate": 1.9757593387147602e-05,
"loss": 0.03,
"step": 12000
},
{
"epoch": 0.10100275535516609,
"grad_norm": 0.1321333944797516,
"learning_rate": 1.9747493111612086e-05,
"loss": 0.03,
"step": 12500
},
{
"epoch": 0.10504286556937273,
"grad_norm": 0.09571998566389084,
"learning_rate": 1.973739283607657e-05,
"loss": 0.0281,
"step": 13000
},
{
"epoch": 0.10908297578357938,
"grad_norm": 0.11053865402936935,
"learning_rate": 1.9727292560541053e-05,
"loss": 0.0277,
"step": 13500
},
{
"epoch": 0.11312308599778602,
"grad_norm": 0.06845366209745407,
"learning_rate": 1.9717192285005536e-05,
"loss": 0.0274,
"step": 14000
},
{
"epoch": 0.11716319621199266,
"grad_norm": 0.11518129706382751,
"learning_rate": 1.970709200947002e-05,
"loss": 0.0267,
"step": 14500
},
{
"epoch": 0.12120330642619931,
"grad_norm": 0.09419895708560944,
"learning_rate": 1.9696991733934503e-05,
"loss": 0.0259,
"step": 15000
},
{
"epoch": 0.12524341664040595,
"grad_norm": 0.11040966212749481,
"learning_rate": 1.9686891458398986e-05,
"loss": 0.0258,
"step": 15500
},
{
"epoch": 0.12928352685461258,
"grad_norm": 0.08434844762086868,
"learning_rate": 1.967679118286347e-05,
"loss": 0.0253,
"step": 16000
},
{
"epoch": 0.13332363706881925,
"grad_norm": 0.13895830512046814,
"learning_rate": 1.9666690907327953e-05,
"loss": 0.0247,
"step": 16500
},
{
"epoch": 0.1373637472830259,
"grad_norm": 0.10540761798620224,
"learning_rate": 1.9656590631792436e-05,
"loss": 0.0245,
"step": 17000
},
{
"epoch": 0.14140385749723253,
"grad_norm": 0.09767123311758041,
"learning_rate": 1.964649035625692e-05,
"loss": 0.0245,
"step": 17500
},
{
"epoch": 0.14544396771143916,
"grad_norm": 0.08917172998189926,
"learning_rate": 1.9636390080721403e-05,
"loss": 0.0251,
"step": 18000
},
{
"epoch": 0.1494840779256458,
"grad_norm": 0.13313362002372742,
"learning_rate": 1.9626289805185886e-05,
"loss": 0.0252,
"step": 18500
},
{
"epoch": 0.15352418813985247,
"grad_norm": 0.12236423790454865,
"learning_rate": 1.961618952965037e-05,
"loss": 0.0251,
"step": 19000
},
{
"epoch": 0.1575642983540591,
"grad_norm": 0.09876661747694016,
"learning_rate": 1.9606089254114853e-05,
"loss": 0.0249,
"step": 19500
},
{
"epoch": 0.16000452492343992,
"eval_f1_macro": 0.9035923074831559,
"eval_f1_micro": 0.9606367859628728,
"eval_loss": 0.11549682915210724,
"eval_precision_macro": 0.9036435627053582,
"eval_precision_micro": 0.9530018135782053,
"eval_recall_macro": 0.9133421606323336,
"eval_recall_micro": 0.9683950814868224,
"eval_runtime": 15280.9037,
"eval_samples_per_second": 2.592,
"eval_steps_per_second": 0.01,
"step": 19802
},
{
"epoch": 0.16160440856826574,
"grad_norm": 0.057935502380132675,
"learning_rate": 1.9595988978579336e-05,
"loss": 0.0246,
"step": 20000
},
{
"epoch": 0.16564451878247238,
"grad_norm": 0.13269482553005219,
"learning_rate": 1.958588870304382e-05,
"loss": 0.0227,
"step": 20500
},
{
"epoch": 0.16968462899667902,
"grad_norm": 0.09697619825601578,
"learning_rate": 1.9575788427508303e-05,
"loss": 0.0229,
"step": 21000
},
{
"epoch": 0.17372473921088566,
"grad_norm": 0.2296031266450882,
"learning_rate": 1.9565688151972786e-05,
"loss": 0.0222,
"step": 21500
},
{
"epoch": 0.17776484942509233,
"grad_norm": 0.07054860144853592,
"learning_rate": 1.955558787643727e-05,
"loss": 0.0217,
"step": 22000
},
{
"epoch": 0.18180495963929896,
"grad_norm": 0.14594444632530212,
"learning_rate": 1.9545487600901753e-05,
"loss": 0.0216,
"step": 22500
},
{
"epoch": 0.1858450698535056,
"grad_norm": 0.1951671838760376,
"learning_rate": 1.9535387325366237e-05,
"loss": 0.0236,
"step": 23000
},
{
"epoch": 0.18988518006771224,
"grad_norm": 0.09014302492141724,
"learning_rate": 1.952528704983072e-05,
"loss": 0.0241,
"step": 23500
},
{
"epoch": 0.19392529028191888,
"grad_norm": 0.07351396232843399,
"learning_rate": 1.9515186774295207e-05,
"loss": 0.0246,
"step": 24000
},
{
"epoch": 0.19796540049612554,
"grad_norm": 0.13433478772640228,
"learning_rate": 1.950508649875969e-05,
"loss": 0.0237,
"step": 24500
},
{
"epoch": 0.20200551071033218,
"grad_norm": 0.09406758099794388,
"learning_rate": 1.949498622322417e-05,
"loss": 0.0246,
"step": 25000
},
{
"epoch": 0.20604562092453882,
"grad_norm": 0.0723891332745552,
"learning_rate": 1.9484885947688653e-05,
"loss": 0.0248,
"step": 25500
},
{
"epoch": 0.21008573113874546,
"grad_norm": 0.0635887160897255,
"learning_rate": 1.9474785672153137e-05,
"loss": 0.0243,
"step": 26000
},
{
"epoch": 0.2141258413529521,
"grad_norm": 0.12226919084787369,
"learning_rate": 1.9464685396617624e-05,
"loss": 0.0246,
"step": 26500
},
{
"epoch": 0.21816595156715876,
"grad_norm": 0.17104922235012054,
"learning_rate": 1.9454585121082107e-05,
"loss": 0.0244,
"step": 27000
},
{
"epoch": 0.2222060617813654,
"grad_norm": 0.07366731762886047,
"learning_rate": 1.9444484845546587e-05,
"loss": 0.0236,
"step": 27500
},
{
"epoch": 0.22624617199557204,
"grad_norm": 0.05332706496119499,
"learning_rate": 1.943438457001107e-05,
"loss": 0.0191,
"step": 28000
},
{
"epoch": 0.23028628220977868,
"grad_norm": 0.12188129127025604,
"learning_rate": 1.9424284294475554e-05,
"loss": 0.0184,
"step": 28500
},
{
"epoch": 0.2343263924239853,
"grad_norm": 0.07722073048353195,
"learning_rate": 1.941418401894004e-05,
"loss": 0.018,
"step": 29000
},
{
"epoch": 0.23836650263819198,
"grad_norm": 0.07803859561681747,
"learning_rate": 1.9404083743404524e-05,
"loss": 0.0176,
"step": 29500
},
{
"epoch": 0.24000678738515988,
"eval_f1_macro": 0.9189758616039659,
"eval_f1_micro": 0.9664452050510047,
"eval_loss": 0.10743161290884018,
"eval_precision_macro": 0.9454764579982075,
"eval_precision_micro": 0.9680139971687907,
"eval_recall_macro": 0.9051902291376027,
"eval_recall_micro": 0.9648814895676969,
"eval_runtime": 13586.8138,
"eval_samples_per_second": 2.915,
"eval_steps_per_second": 0.011,
"step": 29703
},
{
"epoch": 0.24240661285239862,
"grad_norm": 0.04772321879863739,
"learning_rate": 1.9393983467869004e-05,
"loss": 0.0177,
"step": 30000
},
{
"epoch": 0.24644672306660526,
"grad_norm": 0.11928682029247284,
"learning_rate": 1.9383883192333487e-05,
"loss": 0.0229,
"step": 30500
},
{
"epoch": 0.2504868332808119,
"grad_norm": 0.08102133870124817,
"learning_rate": 1.937378291679797e-05,
"loss": 0.0228,
"step": 31000
},
{
"epoch": 0.25452694349501853,
"grad_norm": 0.073371522128582,
"learning_rate": 1.9363682641262457e-05,
"loss": 0.0231,
"step": 31500
},
{
"epoch": 0.25856705370922517,
"grad_norm": 0.08793163299560547,
"learning_rate": 1.935358236572694e-05,
"loss": 0.0229,
"step": 32000
},
{
"epoch": 0.2626071639234318,
"grad_norm": 0.12065927684307098,
"learning_rate": 1.934348209019142e-05,
"loss": 0.0231,
"step": 32500
},
{
"epoch": 0.2666472741376385,
"grad_norm": 0.10052382946014404,
"learning_rate": 1.9333381814655904e-05,
"loss": 0.0218,
"step": 33000
},
{
"epoch": 0.27068738435184514,
"grad_norm": 0.08048272132873535,
"learning_rate": 1.9323281539120387e-05,
"loss": 0.0227,
"step": 33500
},
{
"epoch": 0.2747274945660518,
"grad_norm": 0.05087564140558243,
"learning_rate": 1.9313181263584874e-05,
"loss": 0.0206,
"step": 34000
},
{
"epoch": 0.2787676047802584,
"grad_norm": 0.13495181500911713,
"learning_rate": 1.9303080988049357e-05,
"loss": 0.0213,
"step": 34500
},
{
"epoch": 0.28280771499446505,
"grad_norm": 0.104469895362854,
"learning_rate": 1.9292980712513837e-05,
"loss": 0.0215,
"step": 35000
},
{
"epoch": 0.2868478252086717,
"grad_norm": 0.10830747336149216,
"learning_rate": 1.928288043697832e-05,
"loss": 0.0232,
"step": 35500
},
{
"epoch": 0.29088793542287833,
"grad_norm": 0.07727912068367004,
"learning_rate": 1.9272780161442804e-05,
"loss": 0.0239,
"step": 36000
},
{
"epoch": 0.29492804563708497,
"grad_norm": 0.11781858652830124,
"learning_rate": 1.926267988590729e-05,
"loss": 0.0229,
"step": 36500
},
{
"epoch": 0.2989681558512916,
"grad_norm": 0.09465543925762177,
"learning_rate": 1.9252579610371774e-05,
"loss": 0.0241,
"step": 37000
},
{
"epoch": 0.30300826606549824,
"grad_norm": 0.08024444431066513,
"learning_rate": 1.9242479334836254e-05,
"loss": 0.0236,
"step": 37500
},
{
"epoch": 0.30704837627970494,
"grad_norm": 0.06720072776079178,
"learning_rate": 1.9232379059300738e-05,
"loss": 0.0211,
"step": 38000
},
{
"epoch": 0.3110884864939116,
"grad_norm": 0.13362745940685272,
"learning_rate": 1.922227878376522e-05,
"loss": 0.0203,
"step": 38500
},
{
"epoch": 0.3151285967081182,
"grad_norm": 0.09114370495080948,
"learning_rate": 1.9212178508229708e-05,
"loss": 0.0203,
"step": 39000
},
{
"epoch": 0.31916870692232485,
"grad_norm": 0.07381443679332733,
"learning_rate": 1.920207823269419e-05,
"loss": 0.0206,
"step": 39500
},
{
"epoch": 0.32000904984687983,
"eval_f1_macro": 0.9235874471417507,
"eval_f1_micro": 0.9663972411435532,
"eval_loss": 0.10308045893907547,
"eval_precision_macro": 0.9426979961917242,
"eval_precision_micro": 0.9680776634557233,
"eval_recall_macro": 0.9108391105037915,
"eval_recall_micro": 0.9647226425913261,
"eval_runtime": 13873.9388,
"eval_samples_per_second": 2.854,
"eval_steps_per_second": 0.011,
"step": 39604
},
{
"epoch": 0.3232088171365315,
"grad_norm": 0.07686352729797363,
"learning_rate": 1.9191977957158674e-05,
"loss": 0.0204,
"step": 40000
},
{
"epoch": 0.32724892735073813,
"grad_norm": 0.15459179878234863,
"learning_rate": 1.9181877681623154e-05,
"loss": 0.0225,
"step": 40500
},
{
"epoch": 0.33128903756494477,
"grad_norm": 0.11474985629320145,
"learning_rate": 1.9171777406087638e-05,
"loss": 0.0229,
"step": 41000
},
{
"epoch": 0.3353291477791514,
"grad_norm": 0.09817365556955338,
"learning_rate": 1.9161677130552125e-05,
"loss": 0.0229,
"step": 41500
},
{
"epoch": 0.33936925799335804,
"grad_norm": 0.07288320362567902,
"learning_rate": 1.9151576855016608e-05,
"loss": 0.0222,
"step": 42000
},
{
"epoch": 0.3434093682075647,
"grad_norm": 0.13258545100688934,
"learning_rate": 1.914147657948109e-05,
"loss": 0.0221,
"step": 42500
},
{
"epoch": 0.3474494784217713,
"grad_norm": 0.09609493613243103,
"learning_rate": 1.913137630394557e-05,
"loss": 0.0224,
"step": 43000
},
{
"epoch": 0.351489588635978,
"grad_norm": 0.0800900012254715,
"learning_rate": 1.9121276028410055e-05,
"loss": 0.022,
"step": 43500
},
{
"epoch": 0.35552969885018465,
"grad_norm": 0.06640051305294037,
"learning_rate": 1.911117575287454e-05,
"loss": 0.0209,
"step": 44000
},
{
"epoch": 0.3595698090643913,
"grad_norm": 0.13987226784229279,
"learning_rate": 1.9101075477339025e-05,
"loss": 0.022,
"step": 44500
},
{
"epoch": 0.3636099192785979,
"grad_norm": 0.08626226335763931,
"learning_rate": 1.9090975201803508e-05,
"loss": 0.0219,
"step": 45000
},
{
"epoch": 0.36765002949280456,
"grad_norm": 0.09093815088272095,
"learning_rate": 1.9080874926267988e-05,
"loss": 0.0191,
"step": 45500
},
{
"epoch": 0.3716901397070112,
"grad_norm": 0.062450163066387177,
"learning_rate": 1.907077465073247e-05,
"loss": 0.0192,
"step": 46000
},
{
"epoch": 0.37573024992121784,
"grad_norm": 0.14638446271419525,
"learning_rate": 1.9060674375196958e-05,
"loss": 0.0186,
"step": 46500
},
{
"epoch": 0.3797703601354245,
"grad_norm": 0.08730041235685349,
"learning_rate": 1.905057409966144e-05,
"loss": 0.019,
"step": 47000
},
{
"epoch": 0.3838104703496311,
"grad_norm": 0.09185372292995453,
"learning_rate": 1.9040473824125925e-05,
"loss": 0.0189,
"step": 47500
},
{
"epoch": 0.38785058056383775,
"grad_norm": 0.05995471775531769,
"learning_rate": 1.9030373548590405e-05,
"loss": 0.0174,
"step": 48000
},
{
"epoch": 0.39189069077804445,
"grad_norm": 0.14513157308101654,
"learning_rate": 1.902027327305489e-05,
"loss": 0.0167,
"step": 48500
},
{
"epoch": 0.3959308009922511,
"grad_norm": 0.08400790393352509,
"learning_rate": 1.9010172997519375e-05,
"loss": 0.0165,
"step": 49000
},
{
"epoch": 0.3999709112064577,
"grad_norm": 0.0705028846859932,
"learning_rate": 1.900007272198386e-05,
"loss": 0.0165,
"step": 49500
},
{
"epoch": 0.40001131230859976,
"eval_f1_macro": 0.9281014680466918,
"eval_f1_micro": 0.9684054719516875,
"eval_loss": 0.10883225500583649,
"eval_precision_macro": 0.9609447042869569,
"eval_precision_micro": 0.97977646274136,
"eval_recall_macro": 0.9023603199352568,
"eval_recall_micro": 0.9572953897303177,
"eval_runtime": 14645.8089,
"eval_samples_per_second": 2.704,
"eval_steps_per_second": 0.011,
"step": 49505
},
{
"epoch": 0.40401102142066436,
"grad_norm": 0.06206486374139786,
"learning_rate": 1.8989972446448342e-05,
"loss": 0.0163,
"step": 50000
},
{
"epoch": 0.408051131634871,
"grad_norm": 0.13632065057754517,
"learning_rate": 1.8979872170912822e-05,
"loss": 0.0175,
"step": 50500
},
{
"epoch": 0.41209124184907764,
"grad_norm": 0.10581111907958984,
"learning_rate": 1.896977189537731e-05,
"loss": 0.0179,
"step": 51000
},
{
"epoch": 0.4161313520632843,
"grad_norm": 0.05609723553061485,
"learning_rate": 1.8959671619841792e-05,
"loss": 0.0171,
"step": 51500
},
{
"epoch": 0.4201714622774909,
"grad_norm": 0.0569671131670475,
"learning_rate": 1.8949571344306275e-05,
"loss": 0.0177,
"step": 52000
},
{
"epoch": 0.42421157249169755,
"grad_norm": 0.12548725306987762,
"learning_rate": 1.893947106877076e-05,
"loss": 0.0179,
"step": 52500
},
{
"epoch": 0.4282516827059042,
"grad_norm": 0.14123043417930603,
"learning_rate": 1.892937079323524e-05,
"loss": 0.0195,
"step": 53000
},
{
"epoch": 0.4322917929201109,
"grad_norm": 0.07868105173110962,
"learning_rate": 1.8919270517699725e-05,
"loss": 0.0195,
"step": 53500
},
{
"epoch": 0.4363319031343175,
"grad_norm": 0.0551162026822567,
"learning_rate": 1.890917024216421e-05,
"loss": 0.0194,
"step": 54000
},
{
"epoch": 0.44037201334852416,
"grad_norm": 0.12377525120973587,
"learning_rate": 1.8899069966628692e-05,
"loss": 0.0193,
"step": 54500
},
{
"epoch": 0.4444121235627308,
"grad_norm": 0.07947281748056412,
"learning_rate": 1.8888969691093175e-05,
"loss": 0.0195,
"step": 55000
},
{
"epoch": 0.44845223377693744,
"grad_norm": 0.07180605828762054,
"learning_rate": 1.887886941555766e-05,
"loss": 0.0186,
"step": 55500
},
{
"epoch": 0.4524923439911441,
"grad_norm": 0.0590415820479393,
"learning_rate": 1.8868769140022142e-05,
"loss": 0.0186,
"step": 56000
},
{
"epoch": 0.4565324542053507,
"grad_norm": 0.12405771017074585,
"learning_rate": 1.8858668864486626e-05,
"loss": 0.0181,
"step": 56500
},
{
"epoch": 0.46057256441955735,
"grad_norm": 0.09074413031339645,
"learning_rate": 1.884856858895111e-05,
"loss": 0.0178,
"step": 57000
},
{
"epoch": 0.464612674633764,
"grad_norm": 0.12590628862380981,
"learning_rate": 1.8838468313415592e-05,
"loss": 0.0178,
"step": 57500
},
{
"epoch": 0.4686527848479706,
"grad_norm": 0.047191109508275986,
"learning_rate": 1.8828368037880076e-05,
"loss": 0.0154,
"step": 58000
},
{
"epoch": 0.4726928950621773,
"grad_norm": 0.13741852343082428,
"learning_rate": 1.881826776234456e-05,
"loss": 0.0156,
"step": 58500
},
{
"epoch": 0.47673300527638396,
"grad_norm": 0.07473180443048477,
"learning_rate": 1.8808167486809042e-05,
"loss": 0.0152,
"step": 59000
},
{
"epoch": 0.48001357477031975,
"eval_f1_macro": 0.9238212336154783,
"eval_f1_micro": 0.9658474370811376,
"eval_loss": 0.10412032902240753,
"eval_precision_macro": 0.9360090910956397,
"eval_precision_micro": 0.9629420110715917,
"eval_recall_macro": 0.9167736650129396,
"eval_recall_micro": 0.9687704488794511,
"eval_runtime": 13864.7476,
"eval_samples_per_second": 2.856,
"eval_steps_per_second": 0.011,
"step": 59406
},
{
"epoch": 0.4807731154905906,
"grad_norm": 0.06410785764455795,
"learning_rate": 1.8798067211273526e-05,
"loss": 0.0147,
"step": 59500
},
{
"epoch": 0.48481322570479723,
"grad_norm": 0.05010313540697098,
"learning_rate": 1.878796693573801e-05,
"loss": 0.0156,
"step": 60000
},
{
"epoch": 0.4888533359190039,
"grad_norm": 0.14338257908821106,
"learning_rate": 1.8777866660202493e-05,
"loss": 0.0164,
"step": 60500
},
{
"epoch": 0.4928934461332105,
"grad_norm": 0.09123385697603226,
"learning_rate": 1.8767766384666976e-05,
"loss": 0.0174,
"step": 61000
},
{
"epoch": 0.49693355634741715,
"grad_norm": 0.07728511840105057,
"learning_rate": 1.875766610913146e-05,
"loss": 0.017,
"step": 61500
},
{
"epoch": 0.5009736665616238,
"grad_norm": 0.06151897832751274,
"learning_rate": 1.8747565833595943e-05,
"loss": 0.0172,
"step": 62000
},
{
"epoch": 0.5050137767758305,
"grad_norm": 0.14278863370418549,
"learning_rate": 1.8737465558060426e-05,
"loss": 0.0166,
"step": 62500
},
{
"epoch": 0.5090538869900371,
"grad_norm": 0.08395873010158539,
"learning_rate": 1.872736528252491e-05,
"loss": 0.0206,
"step": 63000
},
{
"epoch": 0.5130939972042438,
"grad_norm": 0.09704262018203735,
"learning_rate": 1.8717265006989393e-05,
"loss": 0.0208,
"step": 63500
},
{
"epoch": 0.5171341074184503,
"grad_norm": 0.06397638469934464,
"learning_rate": 1.8707164731453876e-05,
"loss": 0.0208,
"step": 64000
},
{
"epoch": 0.521174217632657,
"grad_norm": 0.1525479257106781,
"learning_rate": 1.869706445591836e-05,
"loss": 0.0207,
"step": 64500
},
{
"epoch": 0.5252143278468636,
"grad_norm": 0.0878639966249466,
"learning_rate": 1.8686964180382843e-05,
"loss": 0.0198,
"step": 65000
},
{
"epoch": 0.5292544380610703,
"grad_norm": 0.05913593992590904,
"learning_rate": 1.8676863904847326e-05,
"loss": 0.0166,
"step": 65500
},
{
"epoch": 0.533294548275277,
"grad_norm": 0.05049494653940201,
"learning_rate": 1.866676362931181e-05,
"loss": 0.0166,
"step": 66000
},
{
"epoch": 0.5373346584894836,
"grad_norm": 0.10428164154291153,
"learning_rate": 1.8656663353776293e-05,
"loss": 0.0174,
"step": 66500
},
{
"epoch": 0.5413747687036903,
"grad_norm": 0.08380962908267975,
"learning_rate": 1.8646563078240776e-05,
"loss": 0.0165,
"step": 67000
},
{
"epoch": 0.5454148789178969,
"grad_norm": 0.12970462441444397,
"learning_rate": 1.863646280270526e-05,
"loss": 0.0164,
"step": 67500
},
{
"epoch": 0.5494549891321036,
"grad_norm": 0.12594661116600037,
"learning_rate": 1.8626362527169743e-05,
"loss": 0.0189,
"step": 68000
},
{
"epoch": 0.5534950993463101,
"grad_norm": 0.11368534713983536,
"learning_rate": 1.8616262251634226e-05,
"loss": 0.019,
"step": 68500
},
{
"epoch": 0.5575352095605168,
"grad_norm": 0.08376836031675339,
"learning_rate": 1.860616197609871e-05,
"loss": 0.0191,
"step": 69000
},
{
"epoch": 0.5600158372320397,
"eval_f1_macro": 0.9256130258855139,
"eval_f1_micro": 0.9668266025133863,
"eval_loss": 0.10856343805789948,
"eval_precision_macro": 0.9437511235105461,
"eval_precision_micro": 0.9695209969914294,
"eval_recall_macro": 0.9124750251974688,
"eval_recall_micro": 0.9641471425069983,
"eval_runtime": 14189.8384,
"eval_samples_per_second": 2.791,
"eval_steps_per_second": 0.011,
"step": 69307
},
{
"epoch": 0.5615753197747234,
"grad_norm": 0.08000296354293823,
"learning_rate": 1.8596061700563193e-05,
"loss": 0.0188,
"step": 69500
},
{
"epoch": 0.5656154299889301,
"grad_norm": 0.06347772479057312,
"learning_rate": 1.8585961425027677e-05,
"loss": 0.0187,
"step": 70000
},
{
"epoch": 0.5696555402031367,
"grad_norm": 0.21189579367637634,
"learning_rate": 1.857586114949216e-05,
"loss": 0.0196,
"step": 70500
},
{
"epoch": 0.5736956504173434,
"grad_norm": 0.07940568774938583,
"learning_rate": 1.8565760873956643e-05,
"loss": 0.0196,
"step": 71000
},
{
"epoch": 0.5777357606315501,
"grad_norm": 0.07458707690238953,
"learning_rate": 1.8555660598421127e-05,
"loss": 0.0199,
"step": 71500
},
{
"epoch": 0.5817758708457567,
"grad_norm": 0.0705709308385849,
"learning_rate": 1.854556032288561e-05,
"loss": 0.0193,
"step": 72000
},
{
"epoch": 0.5858159810599634,
"grad_norm": 0.13246993720531464,
"learning_rate": 1.8535460047350093e-05,
"loss": 0.0195,
"step": 72500
},
{
"epoch": 0.5898560912741699,
"grad_norm": 0.08721259236335754,
"learning_rate": 1.8525359771814577e-05,
"loss": 0.0196,
"step": 73000
},
{
"epoch": 0.5938962014883766,
"grad_norm": 0.07570379972457886,
"learning_rate": 1.851525949627906e-05,
"loss": 0.0186,
"step": 73500
},
{
"epoch": 0.5979363117025832,
"grad_norm": 0.07477313280105591,
"learning_rate": 1.8505159220743543e-05,
"loss": 0.0183,
"step": 74000
},
{
"epoch": 0.6019764219167899,
"grad_norm": 0.15558893978595734,
"learning_rate": 1.8495058945208027e-05,
"loss": 0.0194,
"step": 74500
},
{
"epoch": 0.6060165321309965,
"grad_norm": 0.08373390883207321,
"learning_rate": 1.848495866967251e-05,
"loss": 0.0189,
"step": 75000
},
{
"epoch": 0.6100566423452032,
"grad_norm": 0.06340883672237396,
"learning_rate": 1.8474858394136994e-05,
"loss": 0.0139,
"step": 75500
},
{
"epoch": 0.6140967525594099,
"grad_norm": 0.05438007041811943,
"learning_rate": 1.8464758118601477e-05,
"loss": 0.0148,
"step": 76000
},
{
"epoch": 0.6181368627736165,
"grad_norm": 0.1218661442399025,
"learning_rate": 1.845465784306596e-05,
"loss": 0.0151,
"step": 76500
},
{
"epoch": 0.6221769729878232,
"grad_norm": 0.0688873752951622,
"learning_rate": 1.8444557567530444e-05,
"loss": 0.0143,
"step": 77000
},
{
"epoch": 0.6262170832020297,
"grad_norm": 0.058265481144189835,
"learning_rate": 1.8434457291994927e-05,
"loss": 0.0142,
"step": 77500
},
{
"epoch": 0.6302571934162364,
"grad_norm": 0.046319037675857544,
"learning_rate": 1.842435701645941e-05,
"loss": 0.0142,
"step": 78000
},
{
"epoch": 0.634297303630443,
"grad_norm": 0.14100997149944305,
"learning_rate": 1.8414256740923894e-05,
"loss": 0.0146,
"step": 78500
},
{
"epoch": 0.6383374138446497,
"grad_norm": 0.10154972225427628,
"learning_rate": 1.8404156465388377e-05,
"loss": 0.0146,
"step": 79000
},
{
"epoch": 0.6400180996937597,
"eval_f1_macro": 0.9268223044157705,
"eval_f1_micro": 0.9675749211491975,
"eval_loss": 0.1128077358007431,
"eval_precision_macro": 0.9500253922065015,
"eval_precision_micro": 0.9728413004763068,
"eval_recall_macro": 0.9094651623357265,
"eval_recall_micro": 0.962365252860739,
"eval_runtime": 13980.4006,
"eval_samples_per_second": 2.833,
"eval_steps_per_second": 0.011,
"step": 79208
},
{
"epoch": 0.6423775240588563,
"grad_norm": 0.1044822484254837,
"learning_rate": 1.839405618985286e-05,
"loss": 0.0142,
"step": 79500
},
{
"epoch": 0.646417634273063,
"grad_norm": 0.061170101165771484,
"learning_rate": 1.8383955914317344e-05,
"loss": 0.0142,
"step": 80000
},
{
"epoch": 0.6504577444872696,
"grad_norm": 0.09872958064079285,
"learning_rate": 1.8373855638781827e-05,
"loss": 0.0174,
"step": 80500
},
{
"epoch": 0.6544978547014763,
"grad_norm": 0.08190814405679703,
"learning_rate": 1.836375536324631e-05,
"loss": 0.016,
"step": 81000
},
{
"epoch": 0.658537964915683,
"grad_norm": 0.07712013274431229,
"learning_rate": 1.8353655087710794e-05,
"loss": 0.0172,
"step": 81500
},
{
"epoch": 0.6625780751298895,
"grad_norm": 0.04823287948966026,
"learning_rate": 1.8343554812175277e-05,
"loss": 0.0168,
"step": 82000
},
{
"epoch": 0.6666181853440962,
"grad_norm": 0.11726228892803192,
"learning_rate": 1.833345453663976e-05,
"loss": 0.017,
"step": 82500
},
{
"epoch": 0.6706582955583028,
"grad_norm": 0.06535898894071579,
"learning_rate": 1.8323354261104244e-05,
"loss": 0.016,
"step": 83000
},
{
"epoch": 0.6746984057725095,
"grad_norm": 0.05892045795917511,
"learning_rate": 1.8313253985568727e-05,
"loss": 0.0159,
"step": 83500
},
{
"epoch": 0.6787385159867161,
"grad_norm": 0.04444234445691109,
"learning_rate": 1.830315371003321e-05,
"loss": 0.0153,
"step": 84000
},
{
"epoch": 0.6827786262009228,
"grad_norm": 0.1465209275484085,
"learning_rate": 1.8293053434497694e-05,
"loss": 0.0156,
"step": 84500
},
{
"epoch": 0.6868187364151294,
"grad_norm": 0.11835352331399918,
"learning_rate": 1.8282953158962178e-05,
"loss": 0.0154,
"step": 85000
},
{
"epoch": 0.690858846629336,
"grad_norm": 0.05793392285704613,
"learning_rate": 1.827285288342666e-05,
"loss": 0.0138,
"step": 85500
},
{
"epoch": 0.6948989568435426,
"grad_norm": 0.045407455414533615,
"learning_rate": 1.8262752607891144e-05,
"loss": 0.0133,
"step": 86000
},
{
"epoch": 0.6989390670577493,
"grad_norm": 0.12997862696647644,
"learning_rate": 1.8252652332355628e-05,
"loss": 0.0141,
"step": 86500
},
{
"epoch": 0.702979177271956,
"grad_norm": 0.07040946930646896,
"learning_rate": 1.824255205682011e-05,
"loss": 0.0138,
"step": 87000
},
{
"epoch": 0.7070192874861626,
"grad_norm": 0.05935658514499664,
"learning_rate": 1.8232451781284594e-05,
"loss": 0.0144,
"step": 87500
},
{
"epoch": 0.7110593977003693,
"grad_norm": 0.0425080843269825,
"learning_rate": 1.8222351505749078e-05,
"loss": 0.018,
"step": 88000
},
{
"epoch": 0.7150995079145759,
"grad_norm": 0.1149262934923172,
"learning_rate": 1.821225123021356e-05,
"loss": 0.0177,
"step": 88500
},
{
"epoch": 0.7191396181287826,
"grad_norm": 0.08022065460681915,
"learning_rate": 1.8202150954678045e-05,
"loss": 0.0172,
"step": 89000
},
{
"epoch": 0.7200203621554796,
"eval_f1_macro": 0.9342654788805338,
"eval_f1_micro": 0.9723695526241013,
"eval_loss": 0.09522199630737305,
"eval_precision_macro": 0.9684740304119624,
"eval_precision_micro": 0.9878875975715066,
"eval_recall_macro": 0.9056287281769387,
"eval_recall_micro": 0.9573314924580955,
"eval_runtime": 13779.2013,
"eval_samples_per_second": 2.874,
"eval_steps_per_second": 0.011,
"step": 89109
},
{
"epoch": 0.7231797283429892,
"grad_norm": 0.06360196322202682,
"learning_rate": 1.8192050679142528e-05,
"loss": 0.0167,
"step": 89500
},
{
"epoch": 0.7272198385571959,
"grad_norm": 0.0733686089515686,
"learning_rate": 1.818195040360701e-05,
"loss": 0.0179,
"step": 90000
},
{
"epoch": 0.7312599487714024,
"grad_norm": 0.1344570368528366,
"learning_rate": 1.8171850128071495e-05,
"loss": 0.0256,
"step": 90500
},
{
"epoch": 0.7353000589856091,
"grad_norm": 0.0946430116891861,
"learning_rate": 1.8161749852535978e-05,
"loss": 0.026,
"step": 91000
},
{
"epoch": 0.7393401691998158,
"grad_norm": 0.07514828443527222,
"learning_rate": 1.815164957700046e-05,
"loss": 0.0251,
"step": 91500
},
{
"epoch": 0.7433802794140224,
"grad_norm": 0.06544400006532669,
"learning_rate": 1.8141549301464945e-05,
"loss": 0.0247,
"step": 92000
},
{
"epoch": 0.7474203896282291,
"grad_norm": 0.11973392963409424,
"learning_rate": 1.8131449025929428e-05,
"loss": 0.0242,
"step": 92500
},
{
"epoch": 0.7514604998424357,
"grad_norm": 0.07870098203420639,
"learning_rate": 1.812134875039391e-05,
"loss": 0.0197,
"step": 93000
},
{
"epoch": 0.7555006100566424,
"grad_norm": 0.06315948814153671,
"learning_rate": 1.8111248474858395e-05,
"loss": 0.0189,
"step": 93500
},
{
"epoch": 0.759540720270849,
"grad_norm": 0.05281440541148186,
"learning_rate": 1.810114819932288e-05,
"loss": 0.0183,
"step": 94000
},
{
"epoch": 0.7635808304850557,
"grad_norm": 0.11212711036205292,
"learning_rate": 1.809104792378736e-05,
"loss": 0.0189,
"step": 94500
},
{
"epoch": 0.7676209406992622,
"grad_norm": 0.13350194692611694,
"learning_rate": 1.8080947648251845e-05,
"loss": 0.019,
"step": 95000
},
{
"epoch": 0.7716610509134689,
"grad_norm": 0.06391710788011551,
"learning_rate": 1.8070847372716328e-05,
"loss": 0.0176,
"step": 95500
},
{
"epoch": 0.7757011611276755,
"grad_norm": 0.06272578239440918,
"learning_rate": 1.806074709718081e-05,
"loss": 0.0175,
"step": 96000
},
{
"epoch": 0.7797412713418822,
"grad_norm": 0.10559968650341034,
"learning_rate": 1.80506468216453e-05,
"loss": 0.0166,
"step": 96500
},
{
"epoch": 0.7837813815560889,
"grad_norm": 0.10264132171869278,
"learning_rate": 1.804054654610978e-05,
"loss": 0.0167,
"step": 97000
},
{
"epoch": 0.7878214917702955,
"grad_norm": 0.06299474835395813,
"learning_rate": 1.8030446270574262e-05,
"loss": 0.0171,
"step": 97500
},
{
"epoch": 0.7918616019845022,
"grad_norm": 0.052749671041965485,
"learning_rate": 1.8020345995038745e-05,
"loss": 0.0182,
"step": 98000
},
{
"epoch": 0.7959017121987088,
"grad_norm": 0.14064335823059082,
"learning_rate": 1.801024571950323e-05,
"loss": 0.0186,
"step": 98500
},
{
"epoch": 0.7999418224129154,
"grad_norm": 0.1007775291800499,
"learning_rate": 1.8000145443967715e-05,
"loss": 0.0183,
"step": 99000
},
{
"epoch": 0.8000226246171995,
"eval_f1_macro": 0.9428325512898811,
"eval_f1_micro": 0.9730330895777283,
"eval_loss": 0.08679112046957016,
"eval_precision_macro": 0.9804326754765803,
"eval_precision_micro": 0.9888949040358451,
"eval_recall_macro": 0.9118424578887557,
"eval_recall_micro": 0.957672087213271,
"eval_runtime": 13633.7127,
"eval_samples_per_second": 2.905,
"eval_steps_per_second": 0.011,
"step": 99010
},
{
"epoch": 0.803981932627122,
"grad_norm": 0.06079207360744476,
"learning_rate": 1.7990045168432195e-05,
"loss": 0.0179,
"step": 99500
},
{
"epoch": 0.8080220428413287,
"grad_norm": 0.08171634376049042,
"learning_rate": 1.797994489289668e-05,
"loss": 0.0174,
"step": 100000
},
{
"epoch": 0.8120621530555353,
"grad_norm": 0.11801985651254654,
"learning_rate": 1.7969844617361162e-05,
"loss": 0.018,
"step": 100500
},
{
"epoch": 0.816102263269742,
"grad_norm": 0.07442731410264969,
"learning_rate": 1.7959744341825645e-05,
"loss": 0.0193,
"step": 101000
},
{
"epoch": 0.8201423734839487,
"grad_norm": 0.07479513436555862,
"learning_rate": 1.7949644066290132e-05,
"loss": 0.0182,
"step": 101500
},
{
"epoch": 0.8241824836981553,
"grad_norm": 0.07510875165462494,
"learning_rate": 1.7939543790754612e-05,
"loss": 0.0187,
"step": 102000
},
{
"epoch": 0.828222593912362,
"grad_norm": 0.12816323339939117,
"learning_rate": 1.7929443515219095e-05,
"loss": 0.018,
"step": 102500
},
{
"epoch": 0.8322627041265686,
"grad_norm": 0.1283213347196579,
"learning_rate": 1.791934323968358e-05,
"loss": 0.017,
"step": 103000
},
{
"epoch": 0.8363028143407752,
"grad_norm": 0.06121571362018585,
"learning_rate": 1.7909242964148062e-05,
"loss": 0.0169,
"step": 103500
},
{
"epoch": 0.8403429245549818,
"grad_norm": 0.05697647109627724,
"learning_rate": 1.789914268861255e-05,
"loss": 0.0165,
"step": 104000
},
{
"epoch": 0.8443830347691885,
"grad_norm": 0.12682537734508514,
"learning_rate": 1.7889042413077032e-05,
"loss": 0.0166,
"step": 104500
},
{
"epoch": 0.8484231449833951,
"grad_norm": 0.0857871025800705,
"learning_rate": 1.7878942137541512e-05,
"loss": 0.0173,
"step": 105000
},
{
"epoch": 0.8524632551976018,
"grad_norm": 0.06892874091863632,
"learning_rate": 1.7868841862005996e-05,
"loss": 0.0171,
"step": 105500
},
{
"epoch": 0.8565033654118084,
"grad_norm": 0.04709647595882416,
"learning_rate": 1.785874158647048e-05,
"loss": 0.0159,
"step": 106000
},
{
"epoch": 0.8605434756260151,
"grad_norm": 0.10291819274425507,
"learning_rate": 1.7848641310934966e-05,
"loss": 0.0165,
"step": 106500
},
{
"epoch": 0.8645835858402218,
"grad_norm": 0.0879896804690361,
"learning_rate": 1.783854103539945e-05,
"loss": 0.0162,
"step": 107000
},
{
"epoch": 0.8686236960544284,
"grad_norm": 0.06169717013835907,
"learning_rate": 1.782844075986393e-05,
"loss": 0.0158,
"step": 107500
},
{
"epoch": 0.872663806268635,
"grad_norm": 0.05489352345466614,
"learning_rate": 1.7818340484328413e-05,
"loss": 0.0165,
"step": 108000
},
{
"epoch": 0.8767039164828416,
"grad_norm": 0.14040745794773102,
"learning_rate": 1.7808240208792896e-05,
"loss": 0.0172,
"step": 108500
},
{
"epoch": 0.8800248870789195,
"eval_f1_macro": 0.9452519445016722,
"eval_f1_micro": 0.9735904566562644,
"eval_loss": 0.09658095985651016,
"eval_precision_macro": 0.9848024256701463,
"eval_precision_micro": 0.9916469998618024,
"eval_recall_macro": 0.9121362312839789,
"eval_recall_micro": 0.956179724302381,
"eval_runtime": 13866.6963,
"eval_samples_per_second": 2.856,
"eval_steps_per_second": 0.011,
"step": 108911
},
{
"epoch": 0.8807440266970483,
"grad_norm": 0.10508357733488083,
"learning_rate": 1.7798139933257383e-05,
"loss": 0.0162,
"step": 109000
},
{
"epoch": 0.8847841369112549,
"grad_norm": 0.06252790987491608,
"learning_rate": 1.7788039657721866e-05,
"loss": 0.0164,
"step": 109500
},
{
"epoch": 0.8888242471254616,
"grad_norm": 0.04974674433469772,
"learning_rate": 1.7777939382186346e-05,
"loss": 0.0165,
"step": 110000
},
{
"epoch": 0.8928643573396682,
"grad_norm": 0.11918849498033524,
"learning_rate": 1.776783910665083e-05,
"loss": 0.0174,
"step": 110500
},
{
"epoch": 0.8969044675538749,
"grad_norm": 0.12928660213947296,
"learning_rate": 1.7757738831115313e-05,
"loss": 0.017,
"step": 111000
},
{
"epoch": 0.9009445777680816,
"grad_norm": 0.06852889806032181,
"learning_rate": 1.77476385555798e-05,
"loss": 0.0163,
"step": 111500
},
{
"epoch": 0.9049846879822881,
"grad_norm": 0.0549907386302948,
"learning_rate": 1.7737538280044283e-05,
"loss": 0.0173,
"step": 112000
},
{
"epoch": 0.9090247981964948,
"grad_norm": 0.12298522889614105,
"learning_rate": 1.7727438004508763e-05,
"loss": 0.0169,
"step": 112500
},
{
"epoch": 0.9130649084107014,
"grad_norm": 0.09733408689498901,
"learning_rate": 1.7717337728973246e-05,
"loss": 0.0177,
"step": 113000
},
{
"epoch": 0.9171050186249081,
"grad_norm": 0.07251332700252533,
"learning_rate": 1.770723745343773e-05,
"loss": 0.0176,
"step": 113500
},
{
"epoch": 0.9211451288391147,
"grad_norm": 0.07106909155845642,
"learning_rate": 1.7697137177902216e-05,
"loss": 0.0174,
"step": 114000
},
{
"epoch": 0.9251852390533214,
"grad_norm": 0.1281566470861435,
"learning_rate": 1.76870369023667e-05,
"loss": 0.0176,
"step": 114500
},
{
"epoch": 0.929225349267528,
"grad_norm": 0.09204866737127304,
"learning_rate": 1.767693662683118e-05,
"loss": 0.0171,
"step": 115000
},
{
"epoch": 0.9332654594817347,
"grad_norm": 0.05850633978843689,
"learning_rate": 1.7666836351295663e-05,
"loss": 0.0151,
"step": 115500
},
{
"epoch": 0.9373055696959413,
"grad_norm": 0.044992174953222275,
"learning_rate": 1.7656736075760146e-05,
"loss": 0.0147,
"step": 116000
},
{
"epoch": 0.941345679910148,
"grad_norm": 0.10752815753221512,
"learning_rate": 1.7646635800224633e-05,
"loss": 0.0155,
"step": 116500
},
{
"epoch": 0.9453857901243546,
"grad_norm": 0.09021549671888351,
"learning_rate": 1.7636535524689117e-05,
"loss": 0.0154,
"step": 117000
},
{
"epoch": 0.9494259003385612,
"grad_norm": 0.0689893364906311,
"learning_rate": 1.7626435249153596e-05,
"loss": 0.0158,
"step": 117500
},
{
"epoch": 0.9534660105527679,
"grad_norm": 0.06845594197511673,
"learning_rate": 1.761633497361808e-05,
"loss": 0.021,
"step": 118000
},
{
"epoch": 0.9575061207669745,
"grad_norm": 0.11164365708827972,
"learning_rate": 1.7606234698082563e-05,
"loss": 0.0209,
"step": 118500
},
{
"epoch": 0.9600271495406395,
"eval_f1_macro": 0.9436502450372893,
"eval_f1_micro": 0.9732083745347168,
"eval_loss": 0.0801812931895256,
"eval_precision_macro": 0.9811968490819049,
"eval_precision_micro": 0.9878954264431127,
"eval_recall_macro": 0.9131012924150136,
"eval_recall_micro": 0.9589516303534508,
"eval_runtime": 13850.1368,
"eval_samples_per_second": 2.859,
"eval_steps_per_second": 0.011,
"step": 118812
},
{
"epoch": 0.9615462309811812,
"grad_norm": 0.0935693234205246,
"learning_rate": 1.759613442254705e-05,
"loss": 0.0202,
"step": 119000
},
{
"epoch": 0.9655863411953878,
"grad_norm": 0.06598909944295883,
"learning_rate": 1.7586034147011533e-05,
"loss": 0.02,
"step": 119500
},
{
"epoch": 0.9696264514095945,
"grad_norm": 0.052590906620025635,
"learning_rate": 1.7575933871476017e-05,
"loss": 0.0203,
"step": 120000
},
{
"epoch": 0.973666561623801,
"grad_norm": 0.1306983232498169,
"learning_rate": 1.7565833595940497e-05,
"loss": 0.0152,
"step": 120500
},
{
"epoch": 0.9777066718380077,
"grad_norm": 0.07255972176790237,
"learning_rate": 1.7555733320404983e-05,
"loss": 0.0156,
"step": 121000
},
{
"epoch": 0.9817467820522144,
"grad_norm": 0.0558183416724205,
"learning_rate": 1.7545633044869467e-05,
"loss": 0.0149,
"step": 121500
},
{
"epoch": 0.985786892266421,
"grad_norm": 0.04536261036992073,
"learning_rate": 1.753553276933395e-05,
"loss": 0.0145,
"step": 122000
},
{
"epoch": 0.9898270024806277,
"grad_norm": 0.12242696434259415,
"learning_rate": 1.7525432493798434e-05,
"loss": 0.0148,
"step": 122500
},
{
"epoch": 0.9938671126948343,
"grad_norm": 0.09054296463727951,
"learning_rate": 1.7515332218262914e-05,
"loss": 0.0184,
"step": 123000
},
{
"epoch": 0.997907222909041,
"grad_norm": 0.0703011155128479,
"learning_rate": 1.75052319427274e-05,
"loss": 0.0187,
"step": 123500
},
{
"epoch": 1.0019473331232476,
"grad_norm": 0.06889301538467407,
"learning_rate": 1.7495131667191884e-05,
"loss": 0.0186,
"step": 124000
},
{
"epoch": 1.0059874433374543,
"grad_norm": 0.1129370704293251,
"learning_rate": 1.7485031391656367e-05,
"loss": 0.0185,
"step": 124500
},
{
"epoch": 1.010027553551661,
"grad_norm": 0.0729982927441597,
"learning_rate": 1.747493111612085e-05,
"loss": 0.0181,
"step": 125000
},
{
"epoch": 1.0140676637658674,
"grad_norm": 0.19092483818531036,
"learning_rate": 1.746483084058533e-05,
"loss": 0.0167,
"step": 125500
},
{
"epoch": 1.0181077739800741,
"grad_norm": 0.04695465415716171,
"learning_rate": 1.7454730565049817e-05,
"loss": 0.0168,
"step": 126000
},
{
"epoch": 1.0221478841942808,
"grad_norm": 0.1297185719013214,
"learning_rate": 1.74446302895143e-05,
"loss": 0.0168,
"step": 126500
},
{
"epoch": 1.0261879944084875,
"grad_norm": 0.07326006889343262,
"learning_rate": 1.7434530013978784e-05,
"loss": 0.0174,
"step": 127000
},
{
"epoch": 1.0302281046226942,
"grad_norm": 0.0644180178642273,
"learning_rate": 1.7424429738443267e-05,
"loss": 0.0169,
"step": 127500
},
{
"epoch": 1.0342682148369007,
"grad_norm": 0.04816208407282829,
"learning_rate": 1.7414329462907747e-05,
"loss": 0.0146,
"step": 128000
},
{
"epoch": 1.0383083250511074,
"grad_norm": 0.09492602199316025,
"learning_rate": 1.7404229187372234e-05,
"loss": 0.0146,
"step": 128500
},
{
"epoch": 1.0400294120023594,
"eval_f1_macro": 0.9447347774982361,
"eval_f1_micro": 0.9738792089577321,
"eval_loss": 0.09039987623691559,
"eval_precision_macro": 0.9845539397176223,
"eval_precision_micro": 0.9913320028997217,
"eval_recall_macro": 0.9116629602052654,
"eval_recall_micro": 0.9570303099541577,
"eval_runtime": 13620.1841,
"eval_samples_per_second": 2.908,
"eval_steps_per_second": 0.011,
"step": 128713
},
{
"epoch": 1.042348435265314,
"grad_norm": 0.08992265909910202,
"learning_rate": 1.7394128911836717e-05,
"loss": 0.014,
"step": 129000
},
{
"epoch": 1.0463885454795208,
"grad_norm": 0.05333436280488968,
"learning_rate": 1.73840286363012e-05,
"loss": 0.015,
"step": 129500
},
{
"epoch": 1.0504286556937272,
"grad_norm": 0.057117633521556854,
"learning_rate": 1.7373928360765684e-05,
"loss": 0.0144,
"step": 130000
},
{
"epoch": 1.054468765907934,
"grad_norm": 0.12276995927095413,
"learning_rate": 1.7363828085230164e-05,
"loss": 0.0228,
"step": 130500
},
{
"epoch": 1.0585088761221406,
"grad_norm": 0.08618568629026413,
"learning_rate": 1.735372780969465e-05,
"loss": 0.0229,
"step": 131000
},
{
"epoch": 1.0625489863363473,
"grad_norm": 0.08783124387264252,
"learning_rate": 1.7343627534159134e-05,
"loss": 0.0222,
"step": 131500
},
{
"epoch": 1.066589096550554,
"grad_norm": 0.06352981925010681,
"learning_rate": 1.7333527258623618e-05,
"loss": 0.0221,
"step": 132000
},
{
"epoch": 1.0706292067647605,
"grad_norm": 0.10115523636341095,
"learning_rate": 1.73234269830881e-05,
"loss": 0.0226,
"step": 132500
},
{
"epoch": 1.0746693169789672,
"grad_norm": 0.11306885629892349,
"learning_rate": 1.731332670755258e-05,
"loss": 0.0162,
"step": 133000
},
{
"epoch": 1.0787094271931739,
"grad_norm": 0.05852317065000534,
"learning_rate": 1.7303226432017068e-05,
"loss": 0.0155,
"step": 133500
},
{
"epoch": 1.0827495374073806,
"grad_norm": 0.046473681926727295,
"learning_rate": 1.729312615648155e-05,
"loss": 0.0149,
"step": 134000
},
{
"epoch": 1.086789647621587,
"grad_norm": 0.11023978888988495,
"learning_rate": 1.7283025880946034e-05,
"loss": 0.0152,
"step": 134500
},
{
"epoch": 1.0908297578357937,
"grad_norm": 0.07801781594753265,
"learning_rate": 1.7272925605410518e-05,
"loss": 0.0145,
"step": 135000
},
{
"epoch": 1.0948698680500004,
"grad_norm": 0.057179663330316544,
"learning_rate": 1.7262825329875e-05,
"loss": 0.0167,
"step": 135500
},
{
"epoch": 1.098909978264207,
"grad_norm": 0.0559101440012455,
"learning_rate": 1.7252725054339484e-05,
"loss": 0.0174,
"step": 136000
},
{
"epoch": 1.1029500884784136,
"grad_norm": 0.08359610289335251,
"learning_rate": 1.7242624778803968e-05,
"loss": 0.0171,
"step": 136500
},
{
"epoch": 1.1069901986926203,
"grad_norm": 0.11296004056930542,
"learning_rate": 1.723252450326845e-05,
"loss": 0.0172,
"step": 137000
},
{
"epoch": 1.111030308906827,
"grad_norm": 0.061936188489198685,
"learning_rate": 1.7222424227732935e-05,
"loss": 0.0173,
"step": 137500
},
{
"epoch": 1.1150704191210337,
"grad_norm": 0.07334394752979279,
"learning_rate": 1.7212323952197418e-05,
"loss": 0.0171,
"step": 138000
},
{
"epoch": 1.1191105293352404,
"grad_norm": 0.10479886829853058,
"learning_rate": 1.72022236766619e-05,
"loss": 0.0167,
"step": 138500
},
{
"epoch": 1.1200316744640795,
"eval_f1_macro": 0.9448181357644012,
"eval_f1_micro": 0.9737656699358889,
"eval_loss": 0.07901577651500702,
"eval_precision_macro": 0.9836772488732869,
"eval_precision_micro": 0.9897679811194957,
"eval_recall_macro": 0.9127768967177174,
"eval_recall_micro": 0.9582725683902614,
"eval_runtime": 13749.2598,
"eval_samples_per_second": 2.88,
"eval_steps_per_second": 0.011,
"step": 138614
},
{
"epoch": 1.1231506395494468,
"grad_norm": 0.07909916341304779,
"learning_rate": 1.7192123401126385e-05,
"loss": 0.0161,
"step": 139000
},
{
"epoch": 1.1271907497636535,
"grad_norm": 0.08238150179386139,
"learning_rate": 1.7182023125590868e-05,
"loss": 0.017,
"step": 139500
},
{
"epoch": 1.1312308599778602,
"grad_norm": 0.06267368793487549,
"learning_rate": 1.717192285005535e-05,
"loss": 0.0164,
"step": 140000
},
{
"epoch": 1.135270970192067,
"grad_norm": 0.11608216911554337,
"learning_rate": 1.7161822574519835e-05,
"loss": 0.0165,
"step": 140500
},
{
"epoch": 1.1393110804062734,
"grad_norm": 0.10431836545467377,
"learning_rate": 1.7151722298984318e-05,
"loss": 0.0161,
"step": 141000
},
{
"epoch": 1.14335119062048,
"grad_norm": 0.06495651602745056,
"learning_rate": 1.71416220234488e-05,
"loss": 0.0165,
"step": 141500
},
{
"epoch": 1.1473913008346868,
"grad_norm": 0.04861852526664734,
"learning_rate": 1.7131521747913285e-05,
"loss": 0.0163,
"step": 142000
},
{
"epoch": 1.1514314110488935,
"grad_norm": 0.17824631929397583,
"learning_rate": 1.7121421472377768e-05,
"loss": 0.0159,
"step": 142500
},
{
"epoch": 1.1554715212631002,
"grad_norm": 0.08877791464328766,
"learning_rate": 1.711132119684225e-05,
"loss": 0.0171,
"step": 143000
},
{
"epoch": 1.1595116314773066,
"grad_norm": 0.06289026886224747,
"learning_rate": 1.7101220921306735e-05,
"loss": 0.0166,
"step": 143500
},
{
"epoch": 1.1635517416915133,
"grad_norm": 0.0498519092798233,
"learning_rate": 1.709112064577122e-05,
"loss": 0.0169,
"step": 144000
},
{
"epoch": 1.16759185190572,
"grad_norm": 0.13069184124469757,
"learning_rate": 1.7081020370235702e-05,
"loss": 0.0168,
"step": 144500
},
{
"epoch": 1.1716319621199267,
"grad_norm": 0.09042539447546005,
"learning_rate": 1.7070920094700185e-05,
"loss": 0.0168,
"step": 145000
},
{
"epoch": 1.1756720723341332,
"grad_norm": 0.05690092593431473,
"learning_rate": 1.706081981916467e-05,
"loss": 0.0166,
"step": 145500
},
{
"epoch": 1.1797121825483399,
"grad_norm": 0.0493723563849926,
"learning_rate": 1.7050719543629152e-05,
"loss": 0.017,
"step": 146000
},
{
"epoch": 1.1837522927625466,
"grad_norm": 0.10125371068716049,
"learning_rate": 1.7040619268093635e-05,
"loss": 0.0165,
"step": 146500
},
{
"epoch": 1.1877924029767533,
"grad_norm": 0.0926498994231224,
"learning_rate": 1.703051899255812e-05,
"loss": 0.0163,
"step": 147000
},
{
"epoch": 1.19183251319096,
"grad_norm": 0.06617089360952377,
"learning_rate": 1.7020418717022602e-05,
"loss": 0.0168,
"step": 147500
},
{
"epoch": 1.1958726234051664,
"grad_norm": 0.05541488900780678,
"learning_rate": 1.7010318441487085e-05,
"loss": 0.0192,
"step": 148000
},
{
"epoch": 1.1999127336193731,
"grad_norm": 0.12656770646572113,
"learning_rate": 1.700021816595157e-05,
"loss": 0.0193,
"step": 148500
},
{
"epoch": 1.2000339369257993,
"eval_f1_macro": 0.9460294234862895,
"eval_f1_micro": 0.9741125567825796,
"eval_loss": 0.08067350834608078,
"eval_precision_macro": 0.986194906743568,
"eval_precision_micro": 0.9906935766072956,
"eval_recall_macro": 0.9129872709919727,
"eval_recall_micro": 0.9580774262702744,
"eval_runtime": 13826.0506,
"eval_samples_per_second": 2.864,
"eval_steps_per_second": 0.011,
"step": 148515
},
{
"epoch": 1.2039528438335798,
"grad_norm": 0.10946424305438995,
"learning_rate": 1.6990117890416052e-05,
"loss": 0.019,
"step": 149000
},
{
"epoch": 1.2079929540477865,
"grad_norm": 0.05134887993335724,
"learning_rate": 1.6980017614880535e-05,
"loss": 0.0177,
"step": 149500
},
{
"epoch": 1.212033064261993,
"grad_norm": 0.08791927248239517,
"learning_rate": 1.696991733934502e-05,
"loss": 0.0188,
"step": 150000
},
{
"epoch": 1.2160731744761997,
"grad_norm": 0.11116321384906769,
"learning_rate": 1.6959817063809502e-05,
"loss": 0.014,
"step": 150500
},
{
"epoch": 1.2201132846904064,
"grad_norm": 0.07135743647813797,
"learning_rate": 1.6949716788273986e-05,
"loss": 0.0133,
"step": 151000
},
{
"epoch": 1.224153394904613,
"grad_norm": 0.06051028147339821,
"learning_rate": 1.693961651273847e-05,
"loss": 0.014,
"step": 151500
},
{
"epoch": 1.2281935051188198,
"grad_norm": 0.05637380853295326,
"learning_rate": 1.6929516237202952e-05,
"loss": 0.0136,
"step": 152000
},
{
"epoch": 1.2322336153330262,
"grad_norm": 0.2139320969581604,
"learning_rate": 1.6919415961667436e-05,
"loss": 0.014,
"step": 152500
},
{
"epoch": 1.236273725547233,
"grad_norm": 0.10385521501302719,
"learning_rate": 1.690931568613192e-05,
"loss": 0.0136,
"step": 153000
},
{
"epoch": 1.2403138357614396,
"grad_norm": 0.052428074181079865,
"learning_rate": 1.6899215410596402e-05,
"loss": 0.0143,
"step": 153500
},
{
"epoch": 1.2443539459756463,
"grad_norm": 0.0810508131980896,
"learning_rate": 1.6889115135060886e-05,
"loss": 0.0136,
"step": 154000
},
{
"epoch": 1.2483940561898528,
"grad_norm": 0.1127280592918396,
"learning_rate": 1.687901485952537e-05,
"loss": 0.0131,
"step": 154500
},
{
"epoch": 1.2524341664040595,
"grad_norm": 0.0869458019733429,
"learning_rate": 1.6868914583989852e-05,
"loss": 0.0134,
"step": 155000
},
{
"epoch": 1.2564742766182662,
"grad_norm": 0.055589742958545685,
"learning_rate": 1.6858814308454336e-05,
"loss": 0.0131,
"step": 155500
},
{
"epoch": 1.2605143868324729,
"grad_norm": 0.07655055820941925,
"learning_rate": 1.684871403291882e-05,
"loss": 0.0133,
"step": 156000
},
{
"epoch": 1.2645544970466793,
"grad_norm": 0.10124019533395767,
"learning_rate": 1.6838613757383303e-05,
"loss": 0.0134,
"step": 156500
},
{
"epoch": 1.268594607260886,
"grad_norm": 0.06868778169155121,
"learning_rate": 1.6828513481847786e-05,
"loss": 0.0131,
"step": 157000
},
{
"epoch": 1.2726347174750927,
"grad_norm": 0.05508118122816086,
"learning_rate": 1.681841320631227e-05,
"loss": 0.013,
"step": 157500
},
{
"epoch": 1.2766748276892994,
"grad_norm": 0.061807744204998016,
"learning_rate": 1.6808312930776753e-05,
"loss": 0.0165,
"step": 158000
},
{
"epoch": 1.2800361993875193,
"eval_f1_macro": 0.9468881762987015,
"eval_f1_micro": 0.974322016191991,
"eval_loss": 0.08317849040031433,
"eval_precision_macro": 0.9867617789603411,
"eval_precision_micro": 0.9924250039485718,
"eval_recall_macro": 0.9135532474499787,
"eval_recall_micro": 0.9568676362293653,
"eval_runtime": 13523.4341,
"eval_samples_per_second": 2.928,
"eval_steps_per_second": 0.011,
"step": 158416
},
{
"epoch": 1.280714937903506,
"grad_norm": 0.14820145070552826,
"learning_rate": 1.6798212655241236e-05,
"loss": 0.0154,
"step": 158500
},
{
"epoch": 1.2847550481177126,
"grad_norm": 0.066920705139637,
"learning_rate": 1.678811237970572e-05,
"loss": 0.0165,
"step": 159000
},
{
"epoch": 1.2887951583319193,
"grad_norm": 0.05135662853717804,
"learning_rate": 1.6778012104170203e-05,
"loss": 0.0157,
"step": 159500
},
{
"epoch": 1.292835268546126,
"grad_norm": 0.0481293685734272,
"learning_rate": 1.6767911828634686e-05,
"loss": 0.0157,
"step": 160000
},
{
"epoch": 1.2968753787603327,
"grad_norm": 0.11119942367076874,
"learning_rate": 1.675781155309917e-05,
"loss": 0.0175,
"step": 160500
},
{
"epoch": 1.3009154889745391,
"grad_norm": 0.10568433254957199,
"learning_rate": 1.6747711277563653e-05,
"loss": 0.0195,
"step": 161000
},
{
"epoch": 1.3049555991887458,
"grad_norm": 0.070424385368824,
"learning_rate": 1.6737611002028136e-05,
"loss": 0.0187,
"step": 161500
},
{
"epoch": 1.3089957094029525,
"grad_norm": 0.055738966912031174,
"learning_rate": 1.672751072649262e-05,
"loss": 0.0178,
"step": 162000
},
{
"epoch": 1.3130358196171592,
"grad_norm": 0.13051150739192963,
"learning_rate": 1.6717410450957103e-05,
"loss": 0.0184,
"step": 162500
},
{
"epoch": 1.317075929831366,
"grad_norm": 0.07910241186618805,
"learning_rate": 1.6707310175421586e-05,
"loss": 0.0155,
"step": 163000
},
{
"epoch": 1.3211160400455724,
"grad_norm": 0.15667231380939484,
"learning_rate": 1.669720989988607e-05,
"loss": 0.0156,
"step": 163500
},
{
"epoch": 1.325156150259779,
"grad_norm": 0.1987818032503128,
"learning_rate": 1.6687109624350553e-05,
"loss": 0.0152,
"step": 164000
},
{
"epoch": 1.3291962604739858,
"grad_norm": 0.13924378156661987,
"learning_rate": 1.6677009348815036e-05,
"loss": 0.0149,
"step": 164500
},
{
"epoch": 1.3332363706881925,
"grad_norm": 0.07680565118789673,
"learning_rate": 1.666690907327952e-05,
"loss": 0.0152,
"step": 165000
},
{
"epoch": 1.337276480902399,
"grad_norm": 0.10616718977689743,
"learning_rate": 1.6656808797744003e-05,
"loss": 0.0248,
"step": 165500
},
{
"epoch": 1.3413165911166056,
"grad_norm": 0.14228446781635284,
"learning_rate": 1.6646708522208487e-05,
"loss": 0.0255,
"step": 166000
},
{
"epoch": 1.3453567013308123,
"grad_norm": 0.12593576312065125,
"learning_rate": 1.6636608246672973e-05,
"loss": 0.0249,
"step": 166500
},
{
"epoch": 1.349396811545019,
"grad_norm": 0.14932659268379211,
"learning_rate": 1.6626507971137453e-05,
"loss": 0.0253,
"step": 167000
},
{
"epoch": 1.3534369217592257,
"grad_norm": 0.09529467672109604,
"learning_rate": 1.6616407695601937e-05,
"loss": 0.0248,
"step": 167500
},
{
"epoch": 1.3574770319734322,
"grad_norm": 0.048431217670440674,
"learning_rate": 1.660630742006642e-05,
"loss": 0.0151,
"step": 168000
},
{
"epoch": 1.3600384618492392,
"eval_f1_macro": 0.9470943315331984,
"eval_f1_micro": 0.9744685617640599,
"eval_loss": 0.08101344108581543,
"eval_precision_macro": 0.9875637466039148,
"eval_precision_micro": 0.9922827909185198,
"eval_recall_macro": 0.9134025250498142,
"eval_recall_micro": 0.957282681677187,
"eval_runtime": 13286.0274,
"eval_samples_per_second": 2.981,
"eval_steps_per_second": 0.012,
"step": 168317
},
{
"epoch": 1.3615171421876389,
"grad_norm": 0.10621971637010574,
"learning_rate": 1.6596207144530903e-05,
"loss": 0.0152,
"step": 168500
},
{
"epoch": 1.3655572524018456,
"grad_norm": 0.07011255621910095,
"learning_rate": 1.658610686899539e-05,
"loss": 0.015,
"step": 169000
},
{
"epoch": 1.3695973626160522,
"grad_norm": 0.05363575369119644,
"learning_rate": 1.657600659345987e-05,
"loss": 0.0148,
"step": 169500
},
{
"epoch": 1.3736374728302587,
"grad_norm": 0.14870333671569824,
"learning_rate": 1.6565906317924354e-05,
"loss": 0.0148,
"step": 170000
},
{
"epoch": 1.3776775830444654,
"grad_norm": 0.11409811675548553,
"learning_rate": 1.6555806042388837e-05,
"loss": 0.0158,
"step": 170500
},
{
"epoch": 1.381717693258672,
"grad_norm": 0.11164900660514832,
"learning_rate": 1.654570576685332e-05,
"loss": 0.0148,
"step": 171000
},
{
"epoch": 1.3857578034728788,
"grad_norm": 0.08794820308685303,
"learning_rate": 1.6535605491317807e-05,
"loss": 0.0158,
"step": 171500
},
{
"epoch": 1.3897979136870853,
"grad_norm": 0.060815006494522095,
"learning_rate": 1.6525505215782287e-05,
"loss": 0.0145,
"step": 172000
},
{
"epoch": 1.393838023901292,
"grad_norm": 0.12906509637832642,
"learning_rate": 1.651540494024677e-05,
"loss": 0.0153,
"step": 172500
},
{
"epoch": 1.3978781341154987,
"grad_norm": 0.09560517966747284,
"learning_rate": 1.6505304664711254e-05,
"loss": 0.026,
"step": 173000
},
{
"epoch": 1.4019182443297054,
"grad_norm": 0.05908598750829697,
"learning_rate": 1.6495204389175737e-05,
"loss": 0.0253,
"step": 173500
},
{
"epoch": 1.405958354543912,
"grad_norm": 0.06017552688717842,
"learning_rate": 1.6485104113640224e-05,
"loss": 0.0248,
"step": 174000
},
{
"epoch": 1.4099984647581185,
"grad_norm": 0.10513614118099213,
"learning_rate": 1.6475003838104704e-05,
"loss": 0.0243,
"step": 174500
},
{
"epoch": 1.4140385749723252,
"grad_norm": 0.08137038350105286,
"learning_rate": 1.6464903562569187e-05,
"loss": 0.0243,
"step": 175000
},
{
"epoch": 1.418078685186532,
"grad_norm": 0.07494989782571793,
"learning_rate": 1.645480328703367e-05,
"loss": 0.0202,
"step": 175500
},
{
"epoch": 1.4221187954007386,
"grad_norm": 0.05562291666865349,
"learning_rate": 1.6444703011498154e-05,
"loss": 0.0204,
"step": 176000
},
{
"epoch": 1.426158905614945,
"grad_norm": 0.11044422537088394,
"learning_rate": 1.643460273596264e-05,
"loss": 0.0202,
"step": 176500
},
{
"epoch": 1.4301990158291518,
"grad_norm": 0.11972752958536148,
"learning_rate": 1.642450246042712e-05,
"loss": 0.0195,
"step": 177000
},
{
"epoch": 1.4342391260433585,
"grad_norm": 0.06898529082536697,
"learning_rate": 1.6414402184891604e-05,
"loss": 0.0203,
"step": 177500
},
{
"epoch": 1.4382792362575652,
"grad_norm": 0.05580909922719002,
"learning_rate": 1.6404301909356087e-05,
"loss": 0.0124,
"step": 178000
},
{
"epoch": 1.4400407243109592,
"eval_f1_macro": 0.9458974211933513,
"eval_f1_micro": 0.974213850978252,
"eval_loss": 0.09569641947746277,
"eval_precision_macro": 0.9869461304954816,
"eval_precision_micro": 0.9919091180407337,
"eval_recall_macro": 0.9122157060173365,
"eval_recall_micro": 0.9571388713888294,
"eval_runtime": 13113.6746,
"eval_samples_per_second": 3.02,
"eval_steps_per_second": 0.012,
"step": 178218
},
{
"epoch": 1.4423193464717718,
"grad_norm": 0.09399819374084473,
"learning_rate": 1.639420163382057e-05,
"loss": 0.0116,
"step": 178500
},
{
"epoch": 1.4463594566859783,
"grad_norm": 0.06601426005363464,
"learning_rate": 1.6384101358285058e-05,
"loss": 0.0117,
"step": 179000
},
{
"epoch": 1.450399566900185,
"grad_norm": 0.11237422376871109,
"learning_rate": 1.6374001082749538e-05,
"loss": 0.0115,
"step": 179500
},
{
"epoch": 1.4544396771143917,
"grad_norm": 0.04262951388955116,
"learning_rate": 1.636390080721402e-05,
"loss": 0.0112,
"step": 180000
},
{
"epoch": 1.4584797873285984,
"grad_norm": 0.13000500202178955,
"learning_rate": 1.6353800531678504e-05,
"loss": 0.0132,
"step": 180500
},
{
"epoch": 1.4625198975428049,
"grad_norm": 0.0949823409318924,
"learning_rate": 1.6343700256142988e-05,
"loss": 0.0129,
"step": 181000
},
{
"epoch": 1.4665600077570116,
"grad_norm": 0.04730290174484253,
"learning_rate": 1.6333599980607474e-05,
"loss": 0.0129,
"step": 181500
},
{
"epoch": 1.4706001179712183,
"grad_norm": 0.050584714859724045,
"learning_rate": 1.6323499705071958e-05,
"loss": 0.013,
"step": 182000
},
{
"epoch": 1.474640228185425,
"grad_norm": 0.1683996021747589,
"learning_rate": 1.6313399429536438e-05,
"loss": 0.0133,
"step": 182500
},
{
"epoch": 1.4786803383996316,
"grad_norm": 0.1036485880613327,
"learning_rate": 1.630329915400092e-05,
"loss": 0.0136,
"step": 183000
},
{
"epoch": 1.4827204486138381,
"grad_norm": 0.11697889119386673,
"learning_rate": 1.6293198878465404e-05,
"loss": 0.0133,
"step": 183500
},
{
"epoch": 1.4867605588280448,
"grad_norm": 0.0688479095697403,
"learning_rate": 1.628309860292989e-05,
"loss": 0.0132,
"step": 184000
},
{
"epoch": 1.4908006690422515,
"grad_norm": 0.12002038955688477,
"learning_rate": 1.6272998327394375e-05,
"loss": 0.0131,
"step": 184500
},
{
"epoch": 1.4948407792564582,
"grad_norm": 0.08021160215139389,
"learning_rate": 1.6262898051858855e-05,
"loss": 0.0133,
"step": 185000
},
{
"epoch": 1.4988808894706647,
"grad_norm": 0.07343757152557373,
"learning_rate": 1.6252797776323338e-05,
"loss": 0.0135,
"step": 185500
},
{
"epoch": 1.5029209996848714,
"grad_norm": 0.058117810636758804,
"learning_rate": 1.624269750078782e-05,
"loss": 0.0143,
"step": 186000
},
{
"epoch": 1.506961109899078,
"grad_norm": 0.10462002456188202,
"learning_rate": 1.6232597225252308e-05,
"loss": 0.0138,
"step": 186500
},
{
"epoch": 1.5110012201132847,
"grad_norm": 0.07825891673564911,
"learning_rate": 1.622249694971679e-05,
"loss": 0.0141,
"step": 187000
},
{
"epoch": 1.5150413303274914,
"grad_norm": 0.05809338763356209,
"learning_rate": 1.621239667418127e-05,
"loss": 0.0136,
"step": 187500
},
{
"epoch": 1.519081440541698,
"grad_norm": 0.05035299435257912,
"learning_rate": 1.6202296398645755e-05,
"loss": 0.0165,
"step": 188000
},
{
"epoch": 1.5200429867726792,
"eval_f1_macro": 0.9465345293457039,
"eval_f1_micro": 0.9742553945189574,
"eval_loss": 0.08490300178527832,
"eval_precision_macro": 0.9844654624463276,
"eval_precision_micro": 0.9898738168824952,
"eval_recall_macro": 0.9150699179615246,
"eval_recall_micro": 0.9591221775256943,
"eval_runtime": 13330.8537,
"eval_samples_per_second": 2.971,
"eval_steps_per_second": 0.012,
"step": 188119
},
{
"epoch": 1.5231215507559046,
"grad_norm": 0.14376361668109894,
"learning_rate": 1.6192196123110238e-05,
"loss": 0.0157,
"step": 188500
},
{
"epoch": 1.5271616609701113,
"grad_norm": 0.07897575944662094,
"learning_rate": 1.6182095847574725e-05,
"loss": 0.0159,
"step": 189000
},
{
"epoch": 1.531201771184318,
"grad_norm": 0.06912536919116974,
"learning_rate": 1.6171995572039208e-05,
"loss": 0.0163,
"step": 189500
},
{
"epoch": 1.5352418813985245,
"grad_norm": 0.05066482350230217,
"learning_rate": 1.6161895296503688e-05,
"loss": 0.0156,
"step": 190000
},
{
"epoch": 1.5392819916127312,
"grad_norm": 0.14292369782924652,
"learning_rate": 1.615179502096817e-05,
"loss": 0.0198,
"step": 190500
},
{
"epoch": 1.5433221018269379,
"grad_norm": 0.08798356354236603,
"learning_rate": 1.614169474543266e-05,
"loss": 0.0197,
"step": 191000
},
{
"epoch": 1.5473622120411445,
"grad_norm": 0.061990030109882355,
"learning_rate": 1.6131594469897142e-05,
"loss": 0.0183,
"step": 191500
},
{
"epoch": 1.551402322255351,
"grad_norm": 0.05433070659637451,
"learning_rate": 1.6121494194361625e-05,
"loss": 0.0183,
"step": 192000
},
{
"epoch": 1.5554424324695577,
"grad_norm": 0.13680632412433624,
"learning_rate": 1.6111393918826105e-05,
"loss": 0.0192,
"step": 192500
},
{
"epoch": 1.5594825426837644,
"grad_norm": 0.1941196620464325,
"learning_rate": 1.610129364329059e-05,
"loss": 0.0176,
"step": 193000
},
{
"epoch": 1.563522652897971,
"grad_norm": 0.08578658103942871,
"learning_rate": 1.6091193367755075e-05,
"loss": 0.0173,
"step": 193500
},
{
"epoch": 1.5675627631121778,
"grad_norm": 0.04361563175916672,
"learning_rate": 1.608109309221956e-05,
"loss": 0.0171,
"step": 194000
},
{
"epoch": 1.5716028733263843,
"grad_norm": 0.12448256462812424,
"learning_rate": 1.6070992816684042e-05,
"loss": 0.0168,
"step": 194500
},
{
"epoch": 1.575642983540591,
"grad_norm": 0.10221997648477554,
"learning_rate": 1.6060892541148522e-05,
"loss": 0.017,
"step": 195000
},
{
"epoch": 1.5796830937547977,
"grad_norm": 0.07009778171777725,
"learning_rate": 1.6050792265613005e-05,
"loss": 0.0175,
"step": 195500
},
{
"epoch": 1.5837232039690043,
"grad_norm": 0.06714298576116562,
"learning_rate": 1.6040691990077492e-05,
"loss": 0.0174,
"step": 196000
},
{
"epoch": 1.5877633141832108,
"grad_norm": 0.12766534090042114,
"learning_rate": 1.6030591714541975e-05,
"loss": 0.0179,
"step": 196500
},
{
"epoch": 1.5918034243974175,
"grad_norm": 0.10328399389982224,
"learning_rate": 1.602049143900646e-05,
"loss": 0.0175,
"step": 197000
},
{
"epoch": 1.5958435346116242,
"grad_norm": 0.09311484545469284,
"learning_rate": 1.601039116347094e-05,
"loss": 0.0172,
"step": 197500
},
{
"epoch": 1.599883644825831,
"grad_norm": 0.08157425373792648,
"learning_rate": 1.6000290887935422e-05,
"loss": 0.0171,
"step": 198000
},
{
"epoch": 1.600045249234399,
"eval_f1_macro": 0.9462988853572672,
"eval_f1_micro": 0.9743141624468545,
"eval_loss": 0.09824506938457489,
"eval_precision_macro": 0.9878799683485701,
"eval_precision_micro": 0.9928597658940401,
"eval_recall_macro": 0.9118503420886092,
"eval_recall_micro": 0.9564486807740753,
"eval_runtime": 13226.8299,
"eval_samples_per_second": 2.994,
"eval_steps_per_second": 0.012,
"step": 198020
},
{
"epoch": 1.6039237550400376,
"grad_norm": 0.10958320647478104,
"learning_rate": 1.599019061239991e-05,
"loss": 0.0167,
"step": 198500
},
{
"epoch": 1.607963865254244,
"grad_norm": 0.07280286401510239,
"learning_rate": 1.5980090336864392e-05,
"loss": 0.0164,
"step": 199000
},
{
"epoch": 1.6120039754684508,
"grad_norm": 0.0816897377371788,
"learning_rate": 1.5969990061328876e-05,
"loss": 0.017,
"step": 199500
},
{
"epoch": 1.6160440856826574,
"grad_norm": 0.046233151108026505,
"learning_rate": 1.595988978579336e-05,
"loss": 0.0163,
"step": 200000
},
{
"epoch": 1.6200841958968641,
"grad_norm": 0.13440461456775665,
"learning_rate": 1.594978951025784e-05,
"loss": 0.015,
"step": 200500
},
{
"epoch": 1.6241243061110706,
"grad_norm": 0.0861237496137619,
"learning_rate": 1.5939689234722326e-05,
"loss": 0.0146,
"step": 201000
},
{
"epoch": 1.6281644163252773,
"grad_norm": 0.06643826514482498,
"learning_rate": 1.592958895918681e-05,
"loss": 0.0142,
"step": 201500
},
{
"epoch": 1.632204526539484,
"grad_norm": 0.06138383969664574,
"learning_rate": 1.5919488683651292e-05,
"loss": 0.0143,
"step": 202000
},
{
"epoch": 1.6362446367536907,
"grad_norm": 0.13212205469608307,
"learning_rate": 1.5909388408115776e-05,
"loss": 0.0147,
"step": 202500
},
{
"epoch": 1.6402847469678974,
"grad_norm": 0.07676049321889877,
"learning_rate": 1.5899288132580256e-05,
"loss": 0.0151,
"step": 203000
},
{
"epoch": 1.6443248571821039,
"grad_norm": 0.10008609294891357,
"learning_rate": 1.5889187857044743e-05,
"loss": 0.0152,
"step": 203500
},
{
"epoch": 1.6483649673963106,
"grad_norm": 0.04750071465969086,
"learning_rate": 1.5879087581509226e-05,
"loss": 0.0157,
"step": 204000
},
{
"epoch": 1.6524050776105172,
"grad_norm": 0.11740187555551529,
"learning_rate": 1.586898730597371e-05,
"loss": 0.0155,
"step": 204500
},
{
"epoch": 1.656445187824724,
"grad_norm": 0.06920389086008072,
"learning_rate": 1.5858887030438193e-05,
"loss": 0.0156,
"step": 205000
},
{
"epoch": 1.6604852980389304,
"grad_norm": 0.05165468528866768,
"learning_rate": 1.5848786754902673e-05,
"loss": 0.0151,
"step": 205500
},
{
"epoch": 1.664525408253137,
"grad_norm": 0.07880023121833801,
"learning_rate": 1.583868647936716e-05,
"loss": 0.0152,
"step": 206000
},
{
"epoch": 1.6685655184673438,
"grad_norm": 0.11061804741621017,
"learning_rate": 1.5828586203831643e-05,
"loss": 0.015,
"step": 206500
},
{
"epoch": 1.6726056286815505,
"grad_norm": 0.08423452824354172,
"learning_rate": 1.5818485928296126e-05,
"loss": 0.015,
"step": 207000
},
{
"epoch": 1.6766457388957572,
"grad_norm": 0.07225336134433746,
"learning_rate": 1.580838565276061e-05,
"loss": 0.0148,
"step": 207500
},
{
"epoch": 1.680047511696119,
"eval_f1_macro": 0.9464641577937303,
"eval_f1_micro": 0.9740978113062491,
"eval_loss": 0.08534455299377441,
"eval_precision_macro": 0.9875234035936792,
"eval_precision_micro": 0.9923821627163134,
"eval_recall_macro": 0.9123021051137999,
"eval_recall_micro": 0.9564750381751005,
"eval_runtime": 12543.2073,
"eval_samples_per_second": 3.157,
"eval_steps_per_second": 0.012,
"step": 207921
},
{
"epoch": 1.6806858491099637,
"grad_norm": 0.05870038643479347,
"learning_rate": 1.579828537722509e-05,
"loss": 0.0197,
"step": 208000
},
{
"epoch": 1.6847259593241704,
"grad_norm": 0.11144687980413437,
"learning_rate": 1.5788185101689576e-05,
"loss": 0.0197,
"step": 208500
},
{
"epoch": 1.688766069538377,
"grad_norm": 0.07475966960191727,
"learning_rate": 1.577808482615406e-05,
"loss": 0.0195,
"step": 209000
},
{
"epoch": 1.6928061797525837,
"grad_norm": 0.05573410540819168,
"learning_rate": 1.5767984550618543e-05,
"loss": 0.0202,
"step": 209500
},
{
"epoch": 1.6968462899667902,
"grad_norm": 0.07953529059886932,
"learning_rate": 1.5757884275083026e-05,
"loss": 0.0191,
"step": 210000
},
{
"epoch": 1.700886400180997,
"grad_norm": 0.08590356260538101,
"learning_rate": 1.5747783999547506e-05,
"loss": 0.0147,
"step": 210500
},
{
"epoch": 1.7049265103952036,
"grad_norm": 0.08645664155483246,
"learning_rate": 1.5737683724011993e-05,
"loss": 0.0145,
"step": 211000
},
{
"epoch": 1.7089666206094103,
"grad_norm": 0.059178948402404785,
"learning_rate": 1.5727583448476476e-05,
"loss": 0.0139,
"step": 211500
},
{
"epoch": 1.7130067308236168,
"grad_norm": 0.05445469170808792,
"learning_rate": 1.571748317294096e-05,
"loss": 0.0143,
"step": 212000
},
{
"epoch": 1.7170468410378235,
"grad_norm": 0.10709578543901443,
"learning_rate": 1.5707382897405443e-05,
"loss": 0.0141,
"step": 212500
},
{
"epoch": 1.7210869512520302,
"grad_norm": 0.0663144662976265,
"learning_rate": 1.5697282621869923e-05,
"loss": 0.0121,
"step": 213000
},
{
"epoch": 1.7251270614662368,
"grad_norm": 0.0667869821190834,
"learning_rate": 1.568718234633441e-05,
"loss": 0.0125,
"step": 213500
},
{
"epoch": 1.7291671716804435,
"grad_norm": 0.09561540186405182,
"learning_rate": 1.5677082070798893e-05,
"loss": 0.0122,
"step": 214000
},
{
"epoch": 1.73320728189465,
"grad_norm": 0.09017562866210938,
"learning_rate": 1.5666981795263377e-05,
"loss": 0.0128,
"step": 214500
},
{
"epoch": 1.7372473921088567,
"grad_norm": 0.06796102970838547,
"learning_rate": 1.565688151972786e-05,
"loss": 0.0129,
"step": 215000
},
{
"epoch": 1.7412875023230634,
"grad_norm": 0.06975946575403214,
"learning_rate": 1.5646781244192343e-05,
"loss": 0.0126,
"step": 215500
},
{
"epoch": 1.74532761253727,
"grad_norm": 0.04627285152673721,
"learning_rate": 1.5636680968656827e-05,
"loss": 0.0126,
"step": 216000
},
{
"epoch": 1.7493677227514766,
"grad_norm": 0.12213249504566193,
"learning_rate": 1.562658069312131e-05,
"loss": 0.0124,
"step": 216500
},
{
"epoch": 1.7534078329656833,
"grad_norm": 0.0799461305141449,
"learning_rate": 1.5616480417585793e-05,
"loss": 0.0125,
"step": 217000
},
{
"epoch": 1.75744794317989,
"grad_norm": 0.05975542962551117,
"learning_rate": 1.5606380142050277e-05,
"loss": 0.0124,
"step": 217500
},
{
"epoch": 1.760049774157839,
"eval_f1_macro": 0.9468023383313312,
"eval_f1_micro": 0.9742265323429071,
"eval_loss": 0.08699483424425125,
"eval_precision_macro": 0.9835541832835969,
"eval_precision_micro": 0.9902263876070855,
"eval_recall_macro": 0.915890529699618,
"eval_recall_micro": 0.9587354997620732,
"eval_runtime": 12459.0163,
"eval_samples_per_second": 3.179,
"eval_steps_per_second": 0.012,
"step": 217822
}
],
"logging_steps": 500,
"max_steps": 990072,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 9901,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.468409508314638e+19,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}