|
{ |
|
"best_metric": 0.9470943315331984, |
|
"best_model_checkpoint": "/shared/3/projects/hiatus/tagged_data/models/roberta-base/binary-finetune-full/results/checkpoint-168317", |
|
"epoch": 1.760049774157839, |
|
"eval_steps": 9901, |
|
"global_step": 217822, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004040110214206643, |
|
"grad_norm": 0.24907590448856354, |
|
"learning_rate": 1.9989899724464485e-05, |
|
"loss": 0.3886, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.008080220428413287, |
|
"grad_norm": 0.19640618562698364, |
|
"learning_rate": 1.997979944892897e-05, |
|
"loss": 0.2835, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01212033064261993, |
|
"grad_norm": 0.21297627687454224, |
|
"learning_rate": 1.9969699173393452e-05, |
|
"loss": 0.2123, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.016160440856826573, |
|
"grad_norm": 0.1339714229106903, |
|
"learning_rate": 1.9959598897857935e-05, |
|
"loss": 0.1631, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.020200551071033218, |
|
"grad_norm": 0.20592452585697174, |
|
"learning_rate": 1.994949862232242e-05, |
|
"loss": 0.1306, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02424066128523986, |
|
"grad_norm": 0.1734761744737625, |
|
"learning_rate": 1.9939398346786902e-05, |
|
"loss": 0.1134, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.028280771499446505, |
|
"grad_norm": 0.1429387331008911, |
|
"learning_rate": 1.9929298071251385e-05, |
|
"loss": 0.0954, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.032320881713653146, |
|
"grad_norm": 0.13530635833740234, |
|
"learning_rate": 1.991919779571587e-05, |
|
"loss": 0.0833, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.03636099192785979, |
|
"grad_norm": 0.16425204277038574, |
|
"learning_rate": 1.9909097520180352e-05, |
|
"loss": 0.0739, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.040401102142066436, |
|
"grad_norm": 0.15035004913806915, |
|
"learning_rate": 1.9898997244644835e-05, |
|
"loss": 0.0657, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.04444121235627308, |
|
"grad_norm": 0.1048048660159111, |
|
"learning_rate": 1.988889696910932e-05, |
|
"loss": 0.0606, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.04848132257047972, |
|
"grad_norm": 0.1167823076248169, |
|
"learning_rate": 1.9878796693573802e-05, |
|
"loss": 0.0555, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.052521432784686364, |
|
"grad_norm": 0.17904673516750336, |
|
"learning_rate": 1.9868696418038285e-05, |
|
"loss": 0.0517, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.05656154299889301, |
|
"grad_norm": 0.22022856771945953, |
|
"learning_rate": 1.985859614250277e-05, |
|
"loss": 0.0483, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.060601653213099654, |
|
"grad_norm": 0.11566773056983948, |
|
"learning_rate": 1.9848495866967252e-05, |
|
"loss": 0.0445, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.06464176342730629, |
|
"grad_norm": 0.09606140851974487, |
|
"learning_rate": 1.9838395591431736e-05, |
|
"loss": 0.0434, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.06868187364151294, |
|
"grad_norm": 0.14014209806919098, |
|
"learning_rate": 1.982829531589622e-05, |
|
"loss": 0.0415, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.07272198385571958, |
|
"grad_norm": 0.1238802894949913, |
|
"learning_rate": 1.9818195040360702e-05, |
|
"loss": 0.0401, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.07676209406992623, |
|
"grad_norm": 0.27284470200538635, |
|
"learning_rate": 1.9808094764825186e-05, |
|
"loss": 0.0375, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.08000226246171996, |
|
"eval_f1_macro": 0.8895664094153212, |
|
"eval_f1_micro": 0.9635283563255777, |
|
"eval_loss": 0.11441826820373535, |
|
"eval_precision_macro": 0.924616270515263, |
|
"eval_precision_micro": 0.9673416146866455, |
|
"eval_recall_macro": 0.8766278018110966, |
|
"eval_recall_micro": 0.9597450436296798, |
|
"eval_runtime": 13336.1892, |
|
"eval_samples_per_second": 2.97, |
|
"eval_steps_per_second": 0.012, |
|
"step": 9901 |
|
}, |
|
{ |
|
"epoch": 0.08080220428413287, |
|
"grad_norm": 0.07111234217882156, |
|
"learning_rate": 1.979799448928967e-05, |
|
"loss": 0.037, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.08484231449833951, |
|
"grad_norm": 0.11708366125822067, |
|
"learning_rate": 1.9787894213754152e-05, |
|
"loss": 0.0337, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.08888242471254616, |
|
"grad_norm": 0.12418048083782196, |
|
"learning_rate": 1.9777793938218636e-05, |
|
"loss": 0.0331, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.0929225349267528, |
|
"grad_norm": 0.09599123895168304, |
|
"learning_rate": 1.976769366268312e-05, |
|
"loss": 0.032, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.09696264514095944, |
|
"grad_norm": 0.08350682258605957, |
|
"learning_rate": 1.9757593387147602e-05, |
|
"loss": 0.03, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.10100275535516609, |
|
"grad_norm": 0.1321333944797516, |
|
"learning_rate": 1.9747493111612086e-05, |
|
"loss": 0.03, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.10504286556937273, |
|
"grad_norm": 0.09571998566389084, |
|
"learning_rate": 1.973739283607657e-05, |
|
"loss": 0.0281, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.10908297578357938, |
|
"grad_norm": 0.11053865402936935, |
|
"learning_rate": 1.9727292560541053e-05, |
|
"loss": 0.0277, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.11312308599778602, |
|
"grad_norm": 0.06845366209745407, |
|
"learning_rate": 1.9717192285005536e-05, |
|
"loss": 0.0274, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.11716319621199266, |
|
"grad_norm": 0.11518129706382751, |
|
"learning_rate": 1.970709200947002e-05, |
|
"loss": 0.0267, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.12120330642619931, |
|
"grad_norm": 0.09419895708560944, |
|
"learning_rate": 1.9696991733934503e-05, |
|
"loss": 0.0259, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.12524341664040595, |
|
"grad_norm": 0.11040966212749481, |
|
"learning_rate": 1.9686891458398986e-05, |
|
"loss": 0.0258, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.12928352685461258, |
|
"grad_norm": 0.08434844762086868, |
|
"learning_rate": 1.967679118286347e-05, |
|
"loss": 0.0253, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.13332363706881925, |
|
"grad_norm": 0.13895830512046814, |
|
"learning_rate": 1.9666690907327953e-05, |
|
"loss": 0.0247, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.1373637472830259, |
|
"grad_norm": 0.10540761798620224, |
|
"learning_rate": 1.9656590631792436e-05, |
|
"loss": 0.0245, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.14140385749723253, |
|
"grad_norm": 0.09767123311758041, |
|
"learning_rate": 1.964649035625692e-05, |
|
"loss": 0.0245, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.14544396771143916, |
|
"grad_norm": 0.08917172998189926, |
|
"learning_rate": 1.9636390080721403e-05, |
|
"loss": 0.0251, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.1494840779256458, |
|
"grad_norm": 0.13313362002372742, |
|
"learning_rate": 1.9626289805185886e-05, |
|
"loss": 0.0252, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.15352418813985247, |
|
"grad_norm": 0.12236423790454865, |
|
"learning_rate": 1.961618952965037e-05, |
|
"loss": 0.0251, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.1575642983540591, |
|
"grad_norm": 0.09876661747694016, |
|
"learning_rate": 1.9606089254114853e-05, |
|
"loss": 0.0249, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.16000452492343992, |
|
"eval_f1_macro": 0.9035923074831559, |
|
"eval_f1_micro": 0.9606367859628728, |
|
"eval_loss": 0.11549682915210724, |
|
"eval_precision_macro": 0.9036435627053582, |
|
"eval_precision_micro": 0.9530018135782053, |
|
"eval_recall_macro": 0.9133421606323336, |
|
"eval_recall_micro": 0.9683950814868224, |
|
"eval_runtime": 15280.9037, |
|
"eval_samples_per_second": 2.592, |
|
"eval_steps_per_second": 0.01, |
|
"step": 19802 |
|
}, |
|
{ |
|
"epoch": 0.16160440856826574, |
|
"grad_norm": 0.057935502380132675, |
|
"learning_rate": 1.9595988978579336e-05, |
|
"loss": 0.0246, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.16564451878247238, |
|
"grad_norm": 0.13269482553005219, |
|
"learning_rate": 1.958588870304382e-05, |
|
"loss": 0.0227, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.16968462899667902, |
|
"grad_norm": 0.09697619825601578, |
|
"learning_rate": 1.9575788427508303e-05, |
|
"loss": 0.0229, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.17372473921088566, |
|
"grad_norm": 0.2296031266450882, |
|
"learning_rate": 1.9565688151972786e-05, |
|
"loss": 0.0222, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.17776484942509233, |
|
"grad_norm": 0.07054860144853592, |
|
"learning_rate": 1.955558787643727e-05, |
|
"loss": 0.0217, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.18180495963929896, |
|
"grad_norm": 0.14594444632530212, |
|
"learning_rate": 1.9545487600901753e-05, |
|
"loss": 0.0216, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.1858450698535056, |
|
"grad_norm": 0.1951671838760376, |
|
"learning_rate": 1.9535387325366237e-05, |
|
"loss": 0.0236, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.18988518006771224, |
|
"grad_norm": 0.09014302492141724, |
|
"learning_rate": 1.952528704983072e-05, |
|
"loss": 0.0241, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.19392529028191888, |
|
"grad_norm": 0.07351396232843399, |
|
"learning_rate": 1.9515186774295207e-05, |
|
"loss": 0.0246, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.19796540049612554, |
|
"grad_norm": 0.13433478772640228, |
|
"learning_rate": 1.950508649875969e-05, |
|
"loss": 0.0237, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.20200551071033218, |
|
"grad_norm": 0.09406758099794388, |
|
"learning_rate": 1.949498622322417e-05, |
|
"loss": 0.0246, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.20604562092453882, |
|
"grad_norm": 0.0723891332745552, |
|
"learning_rate": 1.9484885947688653e-05, |
|
"loss": 0.0248, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.21008573113874546, |
|
"grad_norm": 0.0635887160897255, |
|
"learning_rate": 1.9474785672153137e-05, |
|
"loss": 0.0243, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.2141258413529521, |
|
"grad_norm": 0.12226919084787369, |
|
"learning_rate": 1.9464685396617624e-05, |
|
"loss": 0.0246, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.21816595156715876, |
|
"grad_norm": 0.17104922235012054, |
|
"learning_rate": 1.9454585121082107e-05, |
|
"loss": 0.0244, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.2222060617813654, |
|
"grad_norm": 0.07366731762886047, |
|
"learning_rate": 1.9444484845546587e-05, |
|
"loss": 0.0236, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.22624617199557204, |
|
"grad_norm": 0.05332706496119499, |
|
"learning_rate": 1.943438457001107e-05, |
|
"loss": 0.0191, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.23028628220977868, |
|
"grad_norm": 0.12188129127025604, |
|
"learning_rate": 1.9424284294475554e-05, |
|
"loss": 0.0184, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.2343263924239853, |
|
"grad_norm": 0.07722073048353195, |
|
"learning_rate": 1.941418401894004e-05, |
|
"loss": 0.018, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.23836650263819198, |
|
"grad_norm": 0.07803859561681747, |
|
"learning_rate": 1.9404083743404524e-05, |
|
"loss": 0.0176, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.24000678738515988, |
|
"eval_f1_macro": 0.9189758616039659, |
|
"eval_f1_micro": 0.9664452050510047, |
|
"eval_loss": 0.10743161290884018, |
|
"eval_precision_macro": 0.9454764579982075, |
|
"eval_precision_micro": 0.9680139971687907, |
|
"eval_recall_macro": 0.9051902291376027, |
|
"eval_recall_micro": 0.9648814895676969, |
|
"eval_runtime": 13586.8138, |
|
"eval_samples_per_second": 2.915, |
|
"eval_steps_per_second": 0.011, |
|
"step": 29703 |
|
}, |
|
{ |
|
"epoch": 0.24240661285239862, |
|
"grad_norm": 0.04772321879863739, |
|
"learning_rate": 1.9393983467869004e-05, |
|
"loss": 0.0177, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.24644672306660526, |
|
"grad_norm": 0.11928682029247284, |
|
"learning_rate": 1.9383883192333487e-05, |
|
"loss": 0.0229, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.2504868332808119, |
|
"grad_norm": 0.08102133870124817, |
|
"learning_rate": 1.937378291679797e-05, |
|
"loss": 0.0228, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.25452694349501853, |
|
"grad_norm": 0.073371522128582, |
|
"learning_rate": 1.9363682641262457e-05, |
|
"loss": 0.0231, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.25856705370922517, |
|
"grad_norm": 0.08793163299560547, |
|
"learning_rate": 1.935358236572694e-05, |
|
"loss": 0.0229, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.2626071639234318, |
|
"grad_norm": 0.12065927684307098, |
|
"learning_rate": 1.934348209019142e-05, |
|
"loss": 0.0231, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.2666472741376385, |
|
"grad_norm": 0.10052382946014404, |
|
"learning_rate": 1.9333381814655904e-05, |
|
"loss": 0.0218, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.27068738435184514, |
|
"grad_norm": 0.08048272132873535, |
|
"learning_rate": 1.9323281539120387e-05, |
|
"loss": 0.0227, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.2747274945660518, |
|
"grad_norm": 0.05087564140558243, |
|
"learning_rate": 1.9313181263584874e-05, |
|
"loss": 0.0206, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.2787676047802584, |
|
"grad_norm": 0.13495181500911713, |
|
"learning_rate": 1.9303080988049357e-05, |
|
"loss": 0.0213, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.28280771499446505, |
|
"grad_norm": 0.104469895362854, |
|
"learning_rate": 1.9292980712513837e-05, |
|
"loss": 0.0215, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.2868478252086717, |
|
"grad_norm": 0.10830747336149216, |
|
"learning_rate": 1.928288043697832e-05, |
|
"loss": 0.0232, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.29088793542287833, |
|
"grad_norm": 0.07727912068367004, |
|
"learning_rate": 1.9272780161442804e-05, |
|
"loss": 0.0239, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.29492804563708497, |
|
"grad_norm": 0.11781858652830124, |
|
"learning_rate": 1.926267988590729e-05, |
|
"loss": 0.0229, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.2989681558512916, |
|
"grad_norm": 0.09465543925762177, |
|
"learning_rate": 1.9252579610371774e-05, |
|
"loss": 0.0241, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.30300826606549824, |
|
"grad_norm": 0.08024444431066513, |
|
"learning_rate": 1.9242479334836254e-05, |
|
"loss": 0.0236, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.30704837627970494, |
|
"grad_norm": 0.06720072776079178, |
|
"learning_rate": 1.9232379059300738e-05, |
|
"loss": 0.0211, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.3110884864939116, |
|
"grad_norm": 0.13362745940685272, |
|
"learning_rate": 1.922227878376522e-05, |
|
"loss": 0.0203, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.3151285967081182, |
|
"grad_norm": 0.09114370495080948, |
|
"learning_rate": 1.9212178508229708e-05, |
|
"loss": 0.0203, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.31916870692232485, |
|
"grad_norm": 0.07381443679332733, |
|
"learning_rate": 1.920207823269419e-05, |
|
"loss": 0.0206, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.32000904984687983, |
|
"eval_f1_macro": 0.9235874471417507, |
|
"eval_f1_micro": 0.9663972411435532, |
|
"eval_loss": 0.10308045893907547, |
|
"eval_precision_macro": 0.9426979961917242, |
|
"eval_precision_micro": 0.9680776634557233, |
|
"eval_recall_macro": 0.9108391105037915, |
|
"eval_recall_micro": 0.9647226425913261, |
|
"eval_runtime": 13873.9388, |
|
"eval_samples_per_second": 2.854, |
|
"eval_steps_per_second": 0.011, |
|
"step": 39604 |
|
}, |
|
{ |
|
"epoch": 0.3232088171365315, |
|
"grad_norm": 0.07686352729797363, |
|
"learning_rate": 1.9191977957158674e-05, |
|
"loss": 0.0204, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.32724892735073813, |
|
"grad_norm": 0.15459179878234863, |
|
"learning_rate": 1.9181877681623154e-05, |
|
"loss": 0.0225, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.33128903756494477, |
|
"grad_norm": 0.11474985629320145, |
|
"learning_rate": 1.9171777406087638e-05, |
|
"loss": 0.0229, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.3353291477791514, |
|
"grad_norm": 0.09817365556955338, |
|
"learning_rate": 1.9161677130552125e-05, |
|
"loss": 0.0229, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.33936925799335804, |
|
"grad_norm": 0.07288320362567902, |
|
"learning_rate": 1.9151576855016608e-05, |
|
"loss": 0.0222, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.3434093682075647, |
|
"grad_norm": 0.13258545100688934, |
|
"learning_rate": 1.914147657948109e-05, |
|
"loss": 0.0221, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.3474494784217713, |
|
"grad_norm": 0.09609493613243103, |
|
"learning_rate": 1.913137630394557e-05, |
|
"loss": 0.0224, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.351489588635978, |
|
"grad_norm": 0.0800900012254715, |
|
"learning_rate": 1.9121276028410055e-05, |
|
"loss": 0.022, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.35552969885018465, |
|
"grad_norm": 0.06640051305294037, |
|
"learning_rate": 1.911117575287454e-05, |
|
"loss": 0.0209, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.3595698090643913, |
|
"grad_norm": 0.13987226784229279, |
|
"learning_rate": 1.9101075477339025e-05, |
|
"loss": 0.022, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.3636099192785979, |
|
"grad_norm": 0.08626226335763931, |
|
"learning_rate": 1.9090975201803508e-05, |
|
"loss": 0.0219, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.36765002949280456, |
|
"grad_norm": 0.09093815088272095, |
|
"learning_rate": 1.9080874926267988e-05, |
|
"loss": 0.0191, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.3716901397070112, |
|
"grad_norm": 0.062450163066387177, |
|
"learning_rate": 1.907077465073247e-05, |
|
"loss": 0.0192, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.37573024992121784, |
|
"grad_norm": 0.14638446271419525, |
|
"learning_rate": 1.9060674375196958e-05, |
|
"loss": 0.0186, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.3797703601354245, |
|
"grad_norm": 0.08730041235685349, |
|
"learning_rate": 1.905057409966144e-05, |
|
"loss": 0.019, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.3838104703496311, |
|
"grad_norm": 0.09185372292995453, |
|
"learning_rate": 1.9040473824125925e-05, |
|
"loss": 0.0189, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.38785058056383775, |
|
"grad_norm": 0.05995471775531769, |
|
"learning_rate": 1.9030373548590405e-05, |
|
"loss": 0.0174, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.39189069077804445, |
|
"grad_norm": 0.14513157308101654, |
|
"learning_rate": 1.902027327305489e-05, |
|
"loss": 0.0167, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.3959308009922511, |
|
"grad_norm": 0.08400790393352509, |
|
"learning_rate": 1.9010172997519375e-05, |
|
"loss": 0.0165, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.3999709112064577, |
|
"grad_norm": 0.0705028846859932, |
|
"learning_rate": 1.900007272198386e-05, |
|
"loss": 0.0165, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.40001131230859976, |
|
"eval_f1_macro": 0.9281014680466918, |
|
"eval_f1_micro": 0.9684054719516875, |
|
"eval_loss": 0.10883225500583649, |
|
"eval_precision_macro": 0.9609447042869569, |
|
"eval_precision_micro": 0.97977646274136, |
|
"eval_recall_macro": 0.9023603199352568, |
|
"eval_recall_micro": 0.9572953897303177, |
|
"eval_runtime": 14645.8089, |
|
"eval_samples_per_second": 2.704, |
|
"eval_steps_per_second": 0.011, |
|
"step": 49505 |
|
}, |
|
{ |
|
"epoch": 0.40401102142066436, |
|
"grad_norm": 0.06206486374139786, |
|
"learning_rate": 1.8989972446448342e-05, |
|
"loss": 0.0163, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.408051131634871, |
|
"grad_norm": 0.13632065057754517, |
|
"learning_rate": 1.8979872170912822e-05, |
|
"loss": 0.0175, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.41209124184907764, |
|
"grad_norm": 0.10581111907958984, |
|
"learning_rate": 1.896977189537731e-05, |
|
"loss": 0.0179, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.4161313520632843, |
|
"grad_norm": 0.05609723553061485, |
|
"learning_rate": 1.8959671619841792e-05, |
|
"loss": 0.0171, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.4201714622774909, |
|
"grad_norm": 0.0569671131670475, |
|
"learning_rate": 1.8949571344306275e-05, |
|
"loss": 0.0177, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.42421157249169755, |
|
"grad_norm": 0.12548725306987762, |
|
"learning_rate": 1.893947106877076e-05, |
|
"loss": 0.0179, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.4282516827059042, |
|
"grad_norm": 0.14123043417930603, |
|
"learning_rate": 1.892937079323524e-05, |
|
"loss": 0.0195, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.4322917929201109, |
|
"grad_norm": 0.07868105173110962, |
|
"learning_rate": 1.8919270517699725e-05, |
|
"loss": 0.0195, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.4363319031343175, |
|
"grad_norm": 0.0551162026822567, |
|
"learning_rate": 1.890917024216421e-05, |
|
"loss": 0.0194, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.44037201334852416, |
|
"grad_norm": 0.12377525120973587, |
|
"learning_rate": 1.8899069966628692e-05, |
|
"loss": 0.0193, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.4444121235627308, |
|
"grad_norm": 0.07947281748056412, |
|
"learning_rate": 1.8888969691093175e-05, |
|
"loss": 0.0195, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.44845223377693744, |
|
"grad_norm": 0.07180605828762054, |
|
"learning_rate": 1.887886941555766e-05, |
|
"loss": 0.0186, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.4524923439911441, |
|
"grad_norm": 0.0590415820479393, |
|
"learning_rate": 1.8868769140022142e-05, |
|
"loss": 0.0186, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.4565324542053507, |
|
"grad_norm": 0.12405771017074585, |
|
"learning_rate": 1.8858668864486626e-05, |
|
"loss": 0.0181, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.46057256441955735, |
|
"grad_norm": 0.09074413031339645, |
|
"learning_rate": 1.884856858895111e-05, |
|
"loss": 0.0178, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.464612674633764, |
|
"grad_norm": 0.12590628862380981, |
|
"learning_rate": 1.8838468313415592e-05, |
|
"loss": 0.0178, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.4686527848479706, |
|
"grad_norm": 0.047191109508275986, |
|
"learning_rate": 1.8828368037880076e-05, |
|
"loss": 0.0154, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.4726928950621773, |
|
"grad_norm": 0.13741852343082428, |
|
"learning_rate": 1.881826776234456e-05, |
|
"loss": 0.0156, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.47673300527638396, |
|
"grad_norm": 0.07473180443048477, |
|
"learning_rate": 1.8808167486809042e-05, |
|
"loss": 0.0152, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.48001357477031975, |
|
"eval_f1_macro": 0.9238212336154783, |
|
"eval_f1_micro": 0.9658474370811376, |
|
"eval_loss": 0.10412032902240753, |
|
"eval_precision_macro": 0.9360090910956397, |
|
"eval_precision_micro": 0.9629420110715917, |
|
"eval_recall_macro": 0.9167736650129396, |
|
"eval_recall_micro": 0.9687704488794511, |
|
"eval_runtime": 13864.7476, |
|
"eval_samples_per_second": 2.856, |
|
"eval_steps_per_second": 0.011, |
|
"step": 59406 |
|
}, |
|
{ |
|
"epoch": 0.4807731154905906, |
|
"grad_norm": 0.06410785764455795, |
|
"learning_rate": 1.8798067211273526e-05, |
|
"loss": 0.0147, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.48481322570479723, |
|
"grad_norm": 0.05010313540697098, |
|
"learning_rate": 1.878796693573801e-05, |
|
"loss": 0.0156, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.4888533359190039, |
|
"grad_norm": 0.14338257908821106, |
|
"learning_rate": 1.8777866660202493e-05, |
|
"loss": 0.0164, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.4928934461332105, |
|
"grad_norm": 0.09123385697603226, |
|
"learning_rate": 1.8767766384666976e-05, |
|
"loss": 0.0174, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.49693355634741715, |
|
"grad_norm": 0.07728511840105057, |
|
"learning_rate": 1.875766610913146e-05, |
|
"loss": 0.017, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.5009736665616238, |
|
"grad_norm": 0.06151897832751274, |
|
"learning_rate": 1.8747565833595943e-05, |
|
"loss": 0.0172, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.5050137767758305, |
|
"grad_norm": 0.14278863370418549, |
|
"learning_rate": 1.8737465558060426e-05, |
|
"loss": 0.0166, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.5090538869900371, |
|
"grad_norm": 0.08395873010158539, |
|
"learning_rate": 1.872736528252491e-05, |
|
"loss": 0.0206, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.5130939972042438, |
|
"grad_norm": 0.09704262018203735, |
|
"learning_rate": 1.8717265006989393e-05, |
|
"loss": 0.0208, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.5171341074184503, |
|
"grad_norm": 0.06397638469934464, |
|
"learning_rate": 1.8707164731453876e-05, |
|
"loss": 0.0208, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.521174217632657, |
|
"grad_norm": 0.1525479257106781, |
|
"learning_rate": 1.869706445591836e-05, |
|
"loss": 0.0207, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.5252143278468636, |
|
"grad_norm": 0.0878639966249466, |
|
"learning_rate": 1.8686964180382843e-05, |
|
"loss": 0.0198, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.5292544380610703, |
|
"grad_norm": 0.05913593992590904, |
|
"learning_rate": 1.8676863904847326e-05, |
|
"loss": 0.0166, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.533294548275277, |
|
"grad_norm": 0.05049494653940201, |
|
"learning_rate": 1.866676362931181e-05, |
|
"loss": 0.0166, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.5373346584894836, |
|
"grad_norm": 0.10428164154291153, |
|
"learning_rate": 1.8656663353776293e-05, |
|
"loss": 0.0174, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.5413747687036903, |
|
"grad_norm": 0.08380962908267975, |
|
"learning_rate": 1.8646563078240776e-05, |
|
"loss": 0.0165, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.5454148789178969, |
|
"grad_norm": 0.12970462441444397, |
|
"learning_rate": 1.863646280270526e-05, |
|
"loss": 0.0164, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.5494549891321036, |
|
"grad_norm": 0.12594661116600037, |
|
"learning_rate": 1.8626362527169743e-05, |
|
"loss": 0.0189, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.5534950993463101, |
|
"grad_norm": 0.11368534713983536, |
|
"learning_rate": 1.8616262251634226e-05, |
|
"loss": 0.019, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.5575352095605168, |
|
"grad_norm": 0.08376836031675339, |
|
"learning_rate": 1.860616197609871e-05, |
|
"loss": 0.0191, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.5600158372320397, |
|
"eval_f1_macro": 0.9256130258855139, |
|
"eval_f1_micro": 0.9668266025133863, |
|
"eval_loss": 0.10856343805789948, |
|
"eval_precision_macro": 0.9437511235105461, |
|
"eval_precision_micro": 0.9695209969914294, |
|
"eval_recall_macro": 0.9124750251974688, |
|
"eval_recall_micro": 0.9641471425069983, |
|
"eval_runtime": 14189.8384, |
|
"eval_samples_per_second": 2.791, |
|
"eval_steps_per_second": 0.011, |
|
"step": 69307 |
|
}, |
|
{ |
|
"epoch": 0.5615753197747234, |
|
"grad_norm": 0.08000296354293823, |
|
"learning_rate": 1.8596061700563193e-05, |
|
"loss": 0.0188, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.5656154299889301, |
|
"grad_norm": 0.06347772479057312, |
|
"learning_rate": 1.8585961425027677e-05, |
|
"loss": 0.0187, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.5696555402031367, |
|
"grad_norm": 0.21189579367637634, |
|
"learning_rate": 1.857586114949216e-05, |
|
"loss": 0.0196, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.5736956504173434, |
|
"grad_norm": 0.07940568774938583, |
|
"learning_rate": 1.8565760873956643e-05, |
|
"loss": 0.0196, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.5777357606315501, |
|
"grad_norm": 0.07458707690238953, |
|
"learning_rate": 1.8555660598421127e-05, |
|
"loss": 0.0199, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.5817758708457567, |
|
"grad_norm": 0.0705709308385849, |
|
"learning_rate": 1.854556032288561e-05, |
|
"loss": 0.0193, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.5858159810599634, |
|
"grad_norm": 0.13246993720531464, |
|
"learning_rate": 1.8535460047350093e-05, |
|
"loss": 0.0195, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.5898560912741699, |
|
"grad_norm": 0.08721259236335754, |
|
"learning_rate": 1.8525359771814577e-05, |
|
"loss": 0.0196, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.5938962014883766, |
|
"grad_norm": 0.07570379972457886, |
|
"learning_rate": 1.851525949627906e-05, |
|
"loss": 0.0186, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.5979363117025832, |
|
"grad_norm": 0.07477313280105591, |
|
"learning_rate": 1.8505159220743543e-05, |
|
"loss": 0.0183, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.6019764219167899, |
|
"grad_norm": 0.15558893978595734, |
|
"learning_rate": 1.8495058945208027e-05, |
|
"loss": 0.0194, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.6060165321309965, |
|
"grad_norm": 0.08373390883207321, |
|
"learning_rate": 1.848495866967251e-05, |
|
"loss": 0.0189, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.6100566423452032, |
|
"grad_norm": 0.06340883672237396, |
|
"learning_rate": 1.8474858394136994e-05, |
|
"loss": 0.0139, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.6140967525594099, |
|
"grad_norm": 0.05438007041811943, |
|
"learning_rate": 1.8464758118601477e-05, |
|
"loss": 0.0148, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.6181368627736165, |
|
"grad_norm": 0.1218661442399025, |
|
"learning_rate": 1.845465784306596e-05, |
|
"loss": 0.0151, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.6221769729878232, |
|
"grad_norm": 0.0688873752951622, |
|
"learning_rate": 1.8444557567530444e-05, |
|
"loss": 0.0143, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.6262170832020297, |
|
"grad_norm": 0.058265481144189835, |
|
"learning_rate": 1.8434457291994927e-05, |
|
"loss": 0.0142, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.6302571934162364, |
|
"grad_norm": 0.046319037675857544, |
|
"learning_rate": 1.842435701645941e-05, |
|
"loss": 0.0142, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.634297303630443, |
|
"grad_norm": 0.14100997149944305, |
|
"learning_rate": 1.8414256740923894e-05, |
|
"loss": 0.0146, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.6383374138446497, |
|
"grad_norm": 0.10154972225427628, |
|
"learning_rate": 1.8404156465388377e-05, |
|
"loss": 0.0146, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.6400180996937597, |
|
"eval_f1_macro": 0.9268223044157705, |
|
"eval_f1_micro": 0.9675749211491975, |
|
"eval_loss": 0.1128077358007431, |
|
"eval_precision_macro": 0.9500253922065015, |
|
"eval_precision_micro": 0.9728413004763068, |
|
"eval_recall_macro": 0.9094651623357265, |
|
"eval_recall_micro": 0.962365252860739, |
|
"eval_runtime": 13980.4006, |
|
"eval_samples_per_second": 2.833, |
|
"eval_steps_per_second": 0.011, |
|
"step": 79208 |
|
}, |
|
{ |
|
"epoch": 0.6423775240588563, |
|
"grad_norm": 0.1044822484254837, |
|
"learning_rate": 1.839405618985286e-05, |
|
"loss": 0.0142, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.646417634273063, |
|
"grad_norm": 0.061170101165771484, |
|
"learning_rate": 1.8383955914317344e-05, |
|
"loss": 0.0142, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.6504577444872696, |
|
"grad_norm": 0.09872958064079285, |
|
"learning_rate": 1.8373855638781827e-05, |
|
"loss": 0.0174, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.6544978547014763, |
|
"grad_norm": 0.08190814405679703, |
|
"learning_rate": 1.836375536324631e-05, |
|
"loss": 0.016, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.658537964915683, |
|
"grad_norm": 0.07712013274431229, |
|
"learning_rate": 1.8353655087710794e-05, |
|
"loss": 0.0172, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.6625780751298895, |
|
"grad_norm": 0.04823287948966026, |
|
"learning_rate": 1.8343554812175277e-05, |
|
"loss": 0.0168, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.6666181853440962, |
|
"grad_norm": 0.11726228892803192, |
|
"learning_rate": 1.833345453663976e-05, |
|
"loss": 0.017, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.6706582955583028, |
|
"grad_norm": 0.06535898894071579, |
|
"learning_rate": 1.8323354261104244e-05, |
|
"loss": 0.016, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.6746984057725095, |
|
"grad_norm": 0.05892045795917511, |
|
"learning_rate": 1.8313253985568727e-05, |
|
"loss": 0.0159, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.6787385159867161, |
|
"grad_norm": 0.04444234445691109, |
|
"learning_rate": 1.830315371003321e-05, |
|
"loss": 0.0153, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.6827786262009228, |
|
"grad_norm": 0.1465209275484085, |
|
"learning_rate": 1.8293053434497694e-05, |
|
"loss": 0.0156, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.6868187364151294, |
|
"grad_norm": 0.11835352331399918, |
|
"learning_rate": 1.8282953158962178e-05, |
|
"loss": 0.0154, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.690858846629336, |
|
"grad_norm": 0.05793392285704613, |
|
"learning_rate": 1.827285288342666e-05, |
|
"loss": 0.0138, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.6948989568435426, |
|
"grad_norm": 0.045407455414533615, |
|
"learning_rate": 1.8262752607891144e-05, |
|
"loss": 0.0133, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.6989390670577493, |
|
"grad_norm": 0.12997862696647644, |
|
"learning_rate": 1.8252652332355628e-05, |
|
"loss": 0.0141, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.702979177271956, |
|
"grad_norm": 0.07040946930646896, |
|
"learning_rate": 1.824255205682011e-05, |
|
"loss": 0.0138, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.7070192874861626, |
|
"grad_norm": 0.05935658514499664, |
|
"learning_rate": 1.8232451781284594e-05, |
|
"loss": 0.0144, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.7110593977003693, |
|
"grad_norm": 0.0425080843269825, |
|
"learning_rate": 1.8222351505749078e-05, |
|
"loss": 0.018, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.7150995079145759, |
|
"grad_norm": 0.1149262934923172, |
|
"learning_rate": 1.821225123021356e-05, |
|
"loss": 0.0177, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.7191396181287826, |
|
"grad_norm": 0.08022065460681915, |
|
"learning_rate": 1.8202150954678045e-05, |
|
"loss": 0.0172, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.7200203621554796, |
|
"eval_f1_macro": 0.9342654788805338, |
|
"eval_f1_micro": 0.9723695526241013, |
|
"eval_loss": 0.09522199630737305, |
|
"eval_precision_macro": 0.9684740304119624, |
|
"eval_precision_micro": 0.9878875975715066, |
|
"eval_recall_macro": 0.9056287281769387, |
|
"eval_recall_micro": 0.9573314924580955, |
|
"eval_runtime": 13779.2013, |
|
"eval_samples_per_second": 2.874, |
|
"eval_steps_per_second": 0.011, |
|
"step": 89109 |
|
}, |
|
{ |
|
"epoch": 0.7231797283429892, |
|
"grad_norm": 0.06360196322202682, |
|
"learning_rate": 1.8192050679142528e-05, |
|
"loss": 0.0167, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.7272198385571959, |
|
"grad_norm": 0.0733686089515686, |
|
"learning_rate": 1.818195040360701e-05, |
|
"loss": 0.0179, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.7312599487714024, |
|
"grad_norm": 0.1344570368528366, |
|
"learning_rate": 1.8171850128071495e-05, |
|
"loss": 0.0256, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.7353000589856091, |
|
"grad_norm": 0.0946430116891861, |
|
"learning_rate": 1.8161749852535978e-05, |
|
"loss": 0.026, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.7393401691998158, |
|
"grad_norm": 0.07514828443527222, |
|
"learning_rate": 1.815164957700046e-05, |
|
"loss": 0.0251, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.7433802794140224, |
|
"grad_norm": 0.06544400006532669, |
|
"learning_rate": 1.8141549301464945e-05, |
|
"loss": 0.0247, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.7474203896282291, |
|
"grad_norm": 0.11973392963409424, |
|
"learning_rate": 1.8131449025929428e-05, |
|
"loss": 0.0242, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.7514604998424357, |
|
"grad_norm": 0.07870098203420639, |
|
"learning_rate": 1.812134875039391e-05, |
|
"loss": 0.0197, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.7555006100566424, |
|
"grad_norm": 0.06315948814153671, |
|
"learning_rate": 1.8111248474858395e-05, |
|
"loss": 0.0189, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.759540720270849, |
|
"grad_norm": 0.05281440541148186, |
|
"learning_rate": 1.810114819932288e-05, |
|
"loss": 0.0183, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.7635808304850557, |
|
"grad_norm": 0.11212711036205292, |
|
"learning_rate": 1.809104792378736e-05, |
|
"loss": 0.0189, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.7676209406992622, |
|
"grad_norm": 0.13350194692611694, |
|
"learning_rate": 1.8080947648251845e-05, |
|
"loss": 0.019, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.7716610509134689, |
|
"grad_norm": 0.06391710788011551, |
|
"learning_rate": 1.8070847372716328e-05, |
|
"loss": 0.0176, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.7757011611276755, |
|
"grad_norm": 0.06272578239440918, |
|
"learning_rate": 1.806074709718081e-05, |
|
"loss": 0.0175, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.7797412713418822, |
|
"grad_norm": 0.10559968650341034, |
|
"learning_rate": 1.80506468216453e-05, |
|
"loss": 0.0166, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.7837813815560889, |
|
"grad_norm": 0.10264132171869278, |
|
"learning_rate": 1.804054654610978e-05, |
|
"loss": 0.0167, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.7878214917702955, |
|
"grad_norm": 0.06299474835395813, |
|
"learning_rate": 1.8030446270574262e-05, |
|
"loss": 0.0171, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.7918616019845022, |
|
"grad_norm": 0.052749671041965485, |
|
"learning_rate": 1.8020345995038745e-05, |
|
"loss": 0.0182, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.7959017121987088, |
|
"grad_norm": 0.14064335823059082, |
|
"learning_rate": 1.801024571950323e-05, |
|
"loss": 0.0186, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.7999418224129154, |
|
"grad_norm": 0.1007775291800499, |
|
"learning_rate": 1.8000145443967715e-05, |
|
"loss": 0.0183, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.8000226246171995, |
|
"eval_f1_macro": 0.9428325512898811, |
|
"eval_f1_micro": 0.9730330895777283, |
|
"eval_loss": 0.08679112046957016, |
|
"eval_precision_macro": 0.9804326754765803, |
|
"eval_precision_micro": 0.9888949040358451, |
|
"eval_recall_macro": 0.9118424578887557, |
|
"eval_recall_micro": 0.957672087213271, |
|
"eval_runtime": 13633.7127, |
|
"eval_samples_per_second": 2.905, |
|
"eval_steps_per_second": 0.011, |
|
"step": 99010 |
|
}, |
|
{ |
|
"epoch": 0.803981932627122, |
|
"grad_norm": 0.06079207360744476, |
|
"learning_rate": 1.7990045168432195e-05, |
|
"loss": 0.0179, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.8080220428413287, |
|
"grad_norm": 0.08171634376049042, |
|
"learning_rate": 1.797994489289668e-05, |
|
"loss": 0.0174, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.8120621530555353, |
|
"grad_norm": 0.11801985651254654, |
|
"learning_rate": 1.7969844617361162e-05, |
|
"loss": 0.018, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.816102263269742, |
|
"grad_norm": 0.07442731410264969, |
|
"learning_rate": 1.7959744341825645e-05, |
|
"loss": 0.0193, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.8201423734839487, |
|
"grad_norm": 0.07479513436555862, |
|
"learning_rate": 1.7949644066290132e-05, |
|
"loss": 0.0182, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.8241824836981553, |
|
"grad_norm": 0.07510875165462494, |
|
"learning_rate": 1.7939543790754612e-05, |
|
"loss": 0.0187, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.828222593912362, |
|
"grad_norm": 0.12816323339939117, |
|
"learning_rate": 1.7929443515219095e-05, |
|
"loss": 0.018, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.8322627041265686, |
|
"grad_norm": 0.1283213347196579, |
|
"learning_rate": 1.791934323968358e-05, |
|
"loss": 0.017, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.8363028143407752, |
|
"grad_norm": 0.06121571362018585, |
|
"learning_rate": 1.7909242964148062e-05, |
|
"loss": 0.0169, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.8403429245549818, |
|
"grad_norm": 0.05697647109627724, |
|
"learning_rate": 1.789914268861255e-05, |
|
"loss": 0.0165, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.8443830347691885, |
|
"grad_norm": 0.12682537734508514, |
|
"learning_rate": 1.7889042413077032e-05, |
|
"loss": 0.0166, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.8484231449833951, |
|
"grad_norm": 0.0857871025800705, |
|
"learning_rate": 1.7878942137541512e-05, |
|
"loss": 0.0173, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.8524632551976018, |
|
"grad_norm": 0.06892874091863632, |
|
"learning_rate": 1.7868841862005996e-05, |
|
"loss": 0.0171, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.8565033654118084, |
|
"grad_norm": 0.04709647595882416, |
|
"learning_rate": 1.785874158647048e-05, |
|
"loss": 0.0159, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.8605434756260151, |
|
"grad_norm": 0.10291819274425507, |
|
"learning_rate": 1.7848641310934966e-05, |
|
"loss": 0.0165, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.8645835858402218, |
|
"grad_norm": 0.0879896804690361, |
|
"learning_rate": 1.783854103539945e-05, |
|
"loss": 0.0162, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.8686236960544284, |
|
"grad_norm": 0.06169717013835907, |
|
"learning_rate": 1.782844075986393e-05, |
|
"loss": 0.0158, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.872663806268635, |
|
"grad_norm": 0.05489352345466614, |
|
"learning_rate": 1.7818340484328413e-05, |
|
"loss": 0.0165, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.8767039164828416, |
|
"grad_norm": 0.14040745794773102, |
|
"learning_rate": 1.7808240208792896e-05, |
|
"loss": 0.0172, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.8800248870789195, |
|
"eval_f1_macro": 0.9452519445016722, |
|
"eval_f1_micro": 0.9735904566562644, |
|
"eval_loss": 0.09658095985651016, |
|
"eval_precision_macro": 0.9848024256701463, |
|
"eval_precision_micro": 0.9916469998618024, |
|
"eval_recall_macro": 0.9121362312839789, |
|
"eval_recall_micro": 0.956179724302381, |
|
"eval_runtime": 13866.6963, |
|
"eval_samples_per_second": 2.856, |
|
"eval_steps_per_second": 0.011, |
|
"step": 108911 |
|
}, |
|
{ |
|
"epoch": 0.8807440266970483, |
|
"grad_norm": 0.10508357733488083, |
|
"learning_rate": 1.7798139933257383e-05, |
|
"loss": 0.0162, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.8847841369112549, |
|
"grad_norm": 0.06252790987491608, |
|
"learning_rate": 1.7788039657721866e-05, |
|
"loss": 0.0164, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.8888242471254616, |
|
"grad_norm": 0.04974674433469772, |
|
"learning_rate": 1.7777939382186346e-05, |
|
"loss": 0.0165, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.8928643573396682, |
|
"grad_norm": 0.11918849498033524, |
|
"learning_rate": 1.776783910665083e-05, |
|
"loss": 0.0174, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.8969044675538749, |
|
"grad_norm": 0.12928660213947296, |
|
"learning_rate": 1.7757738831115313e-05, |
|
"loss": 0.017, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.9009445777680816, |
|
"grad_norm": 0.06852889806032181, |
|
"learning_rate": 1.77476385555798e-05, |
|
"loss": 0.0163, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.9049846879822881, |
|
"grad_norm": 0.0549907386302948, |
|
"learning_rate": 1.7737538280044283e-05, |
|
"loss": 0.0173, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.9090247981964948, |
|
"grad_norm": 0.12298522889614105, |
|
"learning_rate": 1.7727438004508763e-05, |
|
"loss": 0.0169, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.9130649084107014, |
|
"grad_norm": 0.09733408689498901, |
|
"learning_rate": 1.7717337728973246e-05, |
|
"loss": 0.0177, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.9171050186249081, |
|
"grad_norm": 0.07251332700252533, |
|
"learning_rate": 1.770723745343773e-05, |
|
"loss": 0.0176, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.9211451288391147, |
|
"grad_norm": 0.07106909155845642, |
|
"learning_rate": 1.7697137177902216e-05, |
|
"loss": 0.0174, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.9251852390533214, |
|
"grad_norm": 0.1281566470861435, |
|
"learning_rate": 1.76870369023667e-05, |
|
"loss": 0.0176, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.929225349267528, |
|
"grad_norm": 0.09204866737127304, |
|
"learning_rate": 1.767693662683118e-05, |
|
"loss": 0.0171, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.9332654594817347, |
|
"grad_norm": 0.05850633978843689, |
|
"learning_rate": 1.7666836351295663e-05, |
|
"loss": 0.0151, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.9373055696959413, |
|
"grad_norm": 0.044992174953222275, |
|
"learning_rate": 1.7656736075760146e-05, |
|
"loss": 0.0147, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.941345679910148, |
|
"grad_norm": 0.10752815753221512, |
|
"learning_rate": 1.7646635800224633e-05, |
|
"loss": 0.0155, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.9453857901243546, |
|
"grad_norm": 0.09021549671888351, |
|
"learning_rate": 1.7636535524689117e-05, |
|
"loss": 0.0154, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.9494259003385612, |
|
"grad_norm": 0.0689893364906311, |
|
"learning_rate": 1.7626435249153596e-05, |
|
"loss": 0.0158, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.9534660105527679, |
|
"grad_norm": 0.06845594197511673, |
|
"learning_rate": 1.761633497361808e-05, |
|
"loss": 0.021, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.9575061207669745, |
|
"grad_norm": 0.11164365708827972, |
|
"learning_rate": 1.7606234698082563e-05, |
|
"loss": 0.0209, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.9600271495406395, |
|
"eval_f1_macro": 0.9436502450372893, |
|
"eval_f1_micro": 0.9732083745347168, |
|
"eval_loss": 0.0801812931895256, |
|
"eval_precision_macro": 0.9811968490819049, |
|
"eval_precision_micro": 0.9878954264431127, |
|
"eval_recall_macro": 0.9131012924150136, |
|
"eval_recall_micro": 0.9589516303534508, |
|
"eval_runtime": 13850.1368, |
|
"eval_samples_per_second": 2.859, |
|
"eval_steps_per_second": 0.011, |
|
"step": 118812 |
|
}, |
|
{ |
|
"epoch": 0.9615462309811812, |
|
"grad_norm": 0.0935693234205246, |
|
"learning_rate": 1.759613442254705e-05, |
|
"loss": 0.0202, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.9655863411953878, |
|
"grad_norm": 0.06598909944295883, |
|
"learning_rate": 1.7586034147011533e-05, |
|
"loss": 0.02, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.9696264514095945, |
|
"grad_norm": 0.052590906620025635, |
|
"learning_rate": 1.7575933871476017e-05, |
|
"loss": 0.0203, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.973666561623801, |
|
"grad_norm": 0.1306983232498169, |
|
"learning_rate": 1.7565833595940497e-05, |
|
"loss": 0.0152, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.9777066718380077, |
|
"grad_norm": 0.07255972176790237, |
|
"learning_rate": 1.7555733320404983e-05, |
|
"loss": 0.0156, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.9817467820522144, |
|
"grad_norm": 0.0558183416724205, |
|
"learning_rate": 1.7545633044869467e-05, |
|
"loss": 0.0149, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.985786892266421, |
|
"grad_norm": 0.04536261036992073, |
|
"learning_rate": 1.753553276933395e-05, |
|
"loss": 0.0145, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.9898270024806277, |
|
"grad_norm": 0.12242696434259415, |
|
"learning_rate": 1.7525432493798434e-05, |
|
"loss": 0.0148, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.9938671126948343, |
|
"grad_norm": 0.09054296463727951, |
|
"learning_rate": 1.7515332218262914e-05, |
|
"loss": 0.0184, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.997907222909041, |
|
"grad_norm": 0.0703011155128479, |
|
"learning_rate": 1.75052319427274e-05, |
|
"loss": 0.0187, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 1.0019473331232476, |
|
"grad_norm": 0.06889301538467407, |
|
"learning_rate": 1.7495131667191884e-05, |
|
"loss": 0.0186, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.0059874433374543, |
|
"grad_norm": 0.1129370704293251, |
|
"learning_rate": 1.7485031391656367e-05, |
|
"loss": 0.0185, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 1.010027553551661, |
|
"grad_norm": 0.0729982927441597, |
|
"learning_rate": 1.747493111612085e-05, |
|
"loss": 0.0181, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.0140676637658674, |
|
"grad_norm": 0.19092483818531036, |
|
"learning_rate": 1.746483084058533e-05, |
|
"loss": 0.0167, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 1.0181077739800741, |
|
"grad_norm": 0.04695465415716171, |
|
"learning_rate": 1.7454730565049817e-05, |
|
"loss": 0.0168, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 1.0221478841942808, |
|
"grad_norm": 0.1297185719013214, |
|
"learning_rate": 1.74446302895143e-05, |
|
"loss": 0.0168, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 1.0261879944084875, |
|
"grad_norm": 0.07326006889343262, |
|
"learning_rate": 1.7434530013978784e-05, |
|
"loss": 0.0174, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 1.0302281046226942, |
|
"grad_norm": 0.0644180178642273, |
|
"learning_rate": 1.7424429738443267e-05, |
|
"loss": 0.0169, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 1.0342682148369007, |
|
"grad_norm": 0.04816208407282829, |
|
"learning_rate": 1.7414329462907747e-05, |
|
"loss": 0.0146, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.0383083250511074, |
|
"grad_norm": 0.09492602199316025, |
|
"learning_rate": 1.7404229187372234e-05, |
|
"loss": 0.0146, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 1.0400294120023594, |
|
"eval_f1_macro": 0.9447347774982361, |
|
"eval_f1_micro": 0.9738792089577321, |
|
"eval_loss": 0.09039987623691559, |
|
"eval_precision_macro": 0.9845539397176223, |
|
"eval_precision_micro": 0.9913320028997217, |
|
"eval_recall_macro": 0.9116629602052654, |
|
"eval_recall_micro": 0.9570303099541577, |
|
"eval_runtime": 13620.1841, |
|
"eval_samples_per_second": 2.908, |
|
"eval_steps_per_second": 0.011, |
|
"step": 128713 |
|
}, |
|
{ |
|
"epoch": 1.042348435265314, |
|
"grad_norm": 0.08992265909910202, |
|
"learning_rate": 1.7394128911836717e-05, |
|
"loss": 0.014, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 1.0463885454795208, |
|
"grad_norm": 0.05333436280488968, |
|
"learning_rate": 1.73840286363012e-05, |
|
"loss": 0.015, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 1.0504286556937272, |
|
"grad_norm": 0.057117633521556854, |
|
"learning_rate": 1.7373928360765684e-05, |
|
"loss": 0.0144, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.054468765907934, |
|
"grad_norm": 0.12276995927095413, |
|
"learning_rate": 1.7363828085230164e-05, |
|
"loss": 0.0228, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 1.0585088761221406, |
|
"grad_norm": 0.08618568629026413, |
|
"learning_rate": 1.735372780969465e-05, |
|
"loss": 0.0229, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 1.0625489863363473, |
|
"grad_norm": 0.08783124387264252, |
|
"learning_rate": 1.7343627534159134e-05, |
|
"loss": 0.0222, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 1.066589096550554, |
|
"grad_norm": 0.06352981925010681, |
|
"learning_rate": 1.7333527258623618e-05, |
|
"loss": 0.0221, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 1.0706292067647605, |
|
"grad_norm": 0.10115523636341095, |
|
"learning_rate": 1.73234269830881e-05, |
|
"loss": 0.0226, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 1.0746693169789672, |
|
"grad_norm": 0.11306885629892349, |
|
"learning_rate": 1.731332670755258e-05, |
|
"loss": 0.0162, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 1.0787094271931739, |
|
"grad_norm": 0.05852317065000534, |
|
"learning_rate": 1.7303226432017068e-05, |
|
"loss": 0.0155, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 1.0827495374073806, |
|
"grad_norm": 0.046473681926727295, |
|
"learning_rate": 1.729312615648155e-05, |
|
"loss": 0.0149, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 1.086789647621587, |
|
"grad_norm": 0.11023978888988495, |
|
"learning_rate": 1.7283025880946034e-05, |
|
"loss": 0.0152, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 1.0908297578357937, |
|
"grad_norm": 0.07801781594753265, |
|
"learning_rate": 1.7272925605410518e-05, |
|
"loss": 0.0145, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 1.0948698680500004, |
|
"grad_norm": 0.057179663330316544, |
|
"learning_rate": 1.7262825329875e-05, |
|
"loss": 0.0167, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 1.098909978264207, |
|
"grad_norm": 0.0559101440012455, |
|
"learning_rate": 1.7252725054339484e-05, |
|
"loss": 0.0174, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.1029500884784136, |
|
"grad_norm": 0.08359610289335251, |
|
"learning_rate": 1.7242624778803968e-05, |
|
"loss": 0.0171, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 1.1069901986926203, |
|
"grad_norm": 0.11296004056930542, |
|
"learning_rate": 1.723252450326845e-05, |
|
"loss": 0.0172, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 1.111030308906827, |
|
"grad_norm": 0.061936188489198685, |
|
"learning_rate": 1.7222424227732935e-05, |
|
"loss": 0.0173, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 1.1150704191210337, |
|
"grad_norm": 0.07334394752979279, |
|
"learning_rate": 1.7212323952197418e-05, |
|
"loss": 0.0171, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 1.1191105293352404, |
|
"grad_norm": 0.10479886829853058, |
|
"learning_rate": 1.72022236766619e-05, |
|
"loss": 0.0167, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 1.1200316744640795, |
|
"eval_f1_macro": 0.9448181357644012, |
|
"eval_f1_micro": 0.9737656699358889, |
|
"eval_loss": 0.07901577651500702, |
|
"eval_precision_macro": 0.9836772488732869, |
|
"eval_precision_micro": 0.9897679811194957, |
|
"eval_recall_macro": 0.9127768967177174, |
|
"eval_recall_micro": 0.9582725683902614, |
|
"eval_runtime": 13749.2598, |
|
"eval_samples_per_second": 2.88, |
|
"eval_steps_per_second": 0.011, |
|
"step": 138614 |
|
}, |
|
{ |
|
"epoch": 1.1231506395494468, |
|
"grad_norm": 0.07909916341304779, |
|
"learning_rate": 1.7192123401126385e-05, |
|
"loss": 0.0161, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 1.1271907497636535, |
|
"grad_norm": 0.08238150179386139, |
|
"learning_rate": 1.7182023125590868e-05, |
|
"loss": 0.017, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 1.1312308599778602, |
|
"grad_norm": 0.06267368793487549, |
|
"learning_rate": 1.717192285005535e-05, |
|
"loss": 0.0164, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.135270970192067, |
|
"grad_norm": 0.11608216911554337, |
|
"learning_rate": 1.7161822574519835e-05, |
|
"loss": 0.0165, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 1.1393110804062734, |
|
"grad_norm": 0.10431836545467377, |
|
"learning_rate": 1.7151722298984318e-05, |
|
"loss": 0.0161, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 1.14335119062048, |
|
"grad_norm": 0.06495651602745056, |
|
"learning_rate": 1.71416220234488e-05, |
|
"loss": 0.0165, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 1.1473913008346868, |
|
"grad_norm": 0.04861852526664734, |
|
"learning_rate": 1.7131521747913285e-05, |
|
"loss": 0.0163, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 1.1514314110488935, |
|
"grad_norm": 0.17824631929397583, |
|
"learning_rate": 1.7121421472377768e-05, |
|
"loss": 0.0159, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 1.1554715212631002, |
|
"grad_norm": 0.08877791464328766, |
|
"learning_rate": 1.711132119684225e-05, |
|
"loss": 0.0171, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 1.1595116314773066, |
|
"grad_norm": 0.06289026886224747, |
|
"learning_rate": 1.7101220921306735e-05, |
|
"loss": 0.0166, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 1.1635517416915133, |
|
"grad_norm": 0.0498519092798233, |
|
"learning_rate": 1.709112064577122e-05, |
|
"loss": 0.0169, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.16759185190572, |
|
"grad_norm": 0.13069184124469757, |
|
"learning_rate": 1.7081020370235702e-05, |
|
"loss": 0.0168, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 1.1716319621199267, |
|
"grad_norm": 0.09042539447546005, |
|
"learning_rate": 1.7070920094700185e-05, |
|
"loss": 0.0168, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 1.1756720723341332, |
|
"grad_norm": 0.05690092593431473, |
|
"learning_rate": 1.706081981916467e-05, |
|
"loss": 0.0166, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 1.1797121825483399, |
|
"grad_norm": 0.0493723563849926, |
|
"learning_rate": 1.7050719543629152e-05, |
|
"loss": 0.017, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 1.1837522927625466, |
|
"grad_norm": 0.10125371068716049, |
|
"learning_rate": 1.7040619268093635e-05, |
|
"loss": 0.0165, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 1.1877924029767533, |
|
"grad_norm": 0.0926498994231224, |
|
"learning_rate": 1.703051899255812e-05, |
|
"loss": 0.0163, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 1.19183251319096, |
|
"grad_norm": 0.06617089360952377, |
|
"learning_rate": 1.7020418717022602e-05, |
|
"loss": 0.0168, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 1.1958726234051664, |
|
"grad_norm": 0.05541488900780678, |
|
"learning_rate": 1.7010318441487085e-05, |
|
"loss": 0.0192, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 1.1999127336193731, |
|
"grad_norm": 0.12656770646572113, |
|
"learning_rate": 1.700021816595157e-05, |
|
"loss": 0.0193, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 1.2000339369257993, |
|
"eval_f1_macro": 0.9460294234862895, |
|
"eval_f1_micro": 0.9741125567825796, |
|
"eval_loss": 0.08067350834608078, |
|
"eval_precision_macro": 0.986194906743568, |
|
"eval_precision_micro": 0.9906935766072956, |
|
"eval_recall_macro": 0.9129872709919727, |
|
"eval_recall_micro": 0.9580774262702744, |
|
"eval_runtime": 13826.0506, |
|
"eval_samples_per_second": 2.864, |
|
"eval_steps_per_second": 0.011, |
|
"step": 148515 |
|
}, |
|
{ |
|
"epoch": 1.2039528438335798, |
|
"grad_norm": 0.10946424305438995, |
|
"learning_rate": 1.6990117890416052e-05, |
|
"loss": 0.019, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 1.2079929540477865, |
|
"grad_norm": 0.05134887993335724, |
|
"learning_rate": 1.6980017614880535e-05, |
|
"loss": 0.0177, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 1.212033064261993, |
|
"grad_norm": 0.08791927248239517, |
|
"learning_rate": 1.696991733934502e-05, |
|
"loss": 0.0188, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 1.2160731744761997, |
|
"grad_norm": 0.11116321384906769, |
|
"learning_rate": 1.6959817063809502e-05, |
|
"loss": 0.014, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 1.2201132846904064, |
|
"grad_norm": 0.07135743647813797, |
|
"learning_rate": 1.6949716788273986e-05, |
|
"loss": 0.0133, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 1.224153394904613, |
|
"grad_norm": 0.06051028147339821, |
|
"learning_rate": 1.693961651273847e-05, |
|
"loss": 0.014, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 1.2281935051188198, |
|
"grad_norm": 0.05637380853295326, |
|
"learning_rate": 1.6929516237202952e-05, |
|
"loss": 0.0136, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 1.2322336153330262, |
|
"grad_norm": 0.2139320969581604, |
|
"learning_rate": 1.6919415961667436e-05, |
|
"loss": 0.014, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 1.236273725547233, |
|
"grad_norm": 0.10385521501302719, |
|
"learning_rate": 1.690931568613192e-05, |
|
"loss": 0.0136, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 1.2403138357614396, |
|
"grad_norm": 0.052428074181079865, |
|
"learning_rate": 1.6899215410596402e-05, |
|
"loss": 0.0143, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 1.2443539459756463, |
|
"grad_norm": 0.0810508131980896, |
|
"learning_rate": 1.6889115135060886e-05, |
|
"loss": 0.0136, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 1.2483940561898528, |
|
"grad_norm": 0.1127280592918396, |
|
"learning_rate": 1.687901485952537e-05, |
|
"loss": 0.0131, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 1.2524341664040595, |
|
"grad_norm": 0.0869458019733429, |
|
"learning_rate": 1.6868914583989852e-05, |
|
"loss": 0.0134, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 1.2564742766182662, |
|
"grad_norm": 0.055589742958545685, |
|
"learning_rate": 1.6858814308454336e-05, |
|
"loss": 0.0131, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 1.2605143868324729, |
|
"grad_norm": 0.07655055820941925, |
|
"learning_rate": 1.684871403291882e-05, |
|
"loss": 0.0133, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 1.2645544970466793, |
|
"grad_norm": 0.10124019533395767, |
|
"learning_rate": 1.6838613757383303e-05, |
|
"loss": 0.0134, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 1.268594607260886, |
|
"grad_norm": 0.06868778169155121, |
|
"learning_rate": 1.6828513481847786e-05, |
|
"loss": 0.0131, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 1.2726347174750927, |
|
"grad_norm": 0.05508118122816086, |
|
"learning_rate": 1.681841320631227e-05, |
|
"loss": 0.013, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 1.2766748276892994, |
|
"grad_norm": 0.061807744204998016, |
|
"learning_rate": 1.6808312930776753e-05, |
|
"loss": 0.0165, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 1.2800361993875193, |
|
"eval_f1_macro": 0.9468881762987015, |
|
"eval_f1_micro": 0.974322016191991, |
|
"eval_loss": 0.08317849040031433, |
|
"eval_precision_macro": 0.9867617789603411, |
|
"eval_precision_micro": 0.9924250039485718, |
|
"eval_recall_macro": 0.9135532474499787, |
|
"eval_recall_micro": 0.9568676362293653, |
|
"eval_runtime": 13523.4341, |
|
"eval_samples_per_second": 2.928, |
|
"eval_steps_per_second": 0.011, |
|
"step": 158416 |
|
}, |
|
{ |
|
"epoch": 1.280714937903506, |
|
"grad_norm": 0.14820145070552826, |
|
"learning_rate": 1.6798212655241236e-05, |
|
"loss": 0.0154, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 1.2847550481177126, |
|
"grad_norm": 0.066920705139637, |
|
"learning_rate": 1.678811237970572e-05, |
|
"loss": 0.0165, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 1.2887951583319193, |
|
"grad_norm": 0.05135662853717804, |
|
"learning_rate": 1.6778012104170203e-05, |
|
"loss": 0.0157, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 1.292835268546126, |
|
"grad_norm": 0.0481293685734272, |
|
"learning_rate": 1.6767911828634686e-05, |
|
"loss": 0.0157, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.2968753787603327, |
|
"grad_norm": 0.11119942367076874, |
|
"learning_rate": 1.675781155309917e-05, |
|
"loss": 0.0175, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 1.3009154889745391, |
|
"grad_norm": 0.10568433254957199, |
|
"learning_rate": 1.6747711277563653e-05, |
|
"loss": 0.0195, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 1.3049555991887458, |
|
"grad_norm": 0.070424385368824, |
|
"learning_rate": 1.6737611002028136e-05, |
|
"loss": 0.0187, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 1.3089957094029525, |
|
"grad_norm": 0.055738966912031174, |
|
"learning_rate": 1.672751072649262e-05, |
|
"loss": 0.0178, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 1.3130358196171592, |
|
"grad_norm": 0.13051150739192963, |
|
"learning_rate": 1.6717410450957103e-05, |
|
"loss": 0.0184, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 1.317075929831366, |
|
"grad_norm": 0.07910241186618805, |
|
"learning_rate": 1.6707310175421586e-05, |
|
"loss": 0.0155, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 1.3211160400455724, |
|
"grad_norm": 0.15667231380939484, |
|
"learning_rate": 1.669720989988607e-05, |
|
"loss": 0.0156, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 1.325156150259779, |
|
"grad_norm": 0.1987818032503128, |
|
"learning_rate": 1.6687109624350553e-05, |
|
"loss": 0.0152, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 1.3291962604739858, |
|
"grad_norm": 0.13924378156661987, |
|
"learning_rate": 1.6677009348815036e-05, |
|
"loss": 0.0149, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 1.3332363706881925, |
|
"grad_norm": 0.07680565118789673, |
|
"learning_rate": 1.666690907327952e-05, |
|
"loss": 0.0152, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 1.337276480902399, |
|
"grad_norm": 0.10616718977689743, |
|
"learning_rate": 1.6656808797744003e-05, |
|
"loss": 0.0248, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 1.3413165911166056, |
|
"grad_norm": 0.14228446781635284, |
|
"learning_rate": 1.6646708522208487e-05, |
|
"loss": 0.0255, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 1.3453567013308123, |
|
"grad_norm": 0.12593576312065125, |
|
"learning_rate": 1.6636608246672973e-05, |
|
"loss": 0.0249, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 1.349396811545019, |
|
"grad_norm": 0.14932659268379211, |
|
"learning_rate": 1.6626507971137453e-05, |
|
"loss": 0.0253, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 1.3534369217592257, |
|
"grad_norm": 0.09529467672109604, |
|
"learning_rate": 1.6616407695601937e-05, |
|
"loss": 0.0248, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 1.3574770319734322, |
|
"grad_norm": 0.048431217670440674, |
|
"learning_rate": 1.660630742006642e-05, |
|
"loss": 0.0151, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 1.3600384618492392, |
|
"eval_f1_macro": 0.9470943315331984, |
|
"eval_f1_micro": 0.9744685617640599, |
|
"eval_loss": 0.08101344108581543, |
|
"eval_precision_macro": 0.9875637466039148, |
|
"eval_precision_micro": 0.9922827909185198, |
|
"eval_recall_macro": 0.9134025250498142, |
|
"eval_recall_micro": 0.957282681677187, |
|
"eval_runtime": 13286.0274, |
|
"eval_samples_per_second": 2.981, |
|
"eval_steps_per_second": 0.012, |
|
"step": 168317 |
|
}, |
|
{ |
|
"epoch": 1.3615171421876389, |
|
"grad_norm": 0.10621971637010574, |
|
"learning_rate": 1.6596207144530903e-05, |
|
"loss": 0.0152, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 1.3655572524018456, |
|
"grad_norm": 0.07011255621910095, |
|
"learning_rate": 1.658610686899539e-05, |
|
"loss": 0.015, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 1.3695973626160522, |
|
"grad_norm": 0.05363575369119644, |
|
"learning_rate": 1.657600659345987e-05, |
|
"loss": 0.0148, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 1.3736374728302587, |
|
"grad_norm": 0.14870333671569824, |
|
"learning_rate": 1.6565906317924354e-05, |
|
"loss": 0.0148, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 1.3776775830444654, |
|
"grad_norm": 0.11409811675548553, |
|
"learning_rate": 1.6555806042388837e-05, |
|
"loss": 0.0158, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 1.381717693258672, |
|
"grad_norm": 0.11164900660514832, |
|
"learning_rate": 1.654570576685332e-05, |
|
"loss": 0.0148, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 1.3857578034728788, |
|
"grad_norm": 0.08794820308685303, |
|
"learning_rate": 1.6535605491317807e-05, |
|
"loss": 0.0158, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 1.3897979136870853, |
|
"grad_norm": 0.060815006494522095, |
|
"learning_rate": 1.6525505215782287e-05, |
|
"loss": 0.0145, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 1.393838023901292, |
|
"grad_norm": 0.12906509637832642, |
|
"learning_rate": 1.651540494024677e-05, |
|
"loss": 0.0153, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 1.3978781341154987, |
|
"grad_norm": 0.09560517966747284, |
|
"learning_rate": 1.6505304664711254e-05, |
|
"loss": 0.026, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 1.4019182443297054, |
|
"grad_norm": 0.05908598750829697, |
|
"learning_rate": 1.6495204389175737e-05, |
|
"loss": 0.0253, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 1.405958354543912, |
|
"grad_norm": 0.06017552688717842, |
|
"learning_rate": 1.6485104113640224e-05, |
|
"loss": 0.0248, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 1.4099984647581185, |
|
"grad_norm": 0.10513614118099213, |
|
"learning_rate": 1.6475003838104704e-05, |
|
"loss": 0.0243, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 1.4140385749723252, |
|
"grad_norm": 0.08137038350105286, |
|
"learning_rate": 1.6464903562569187e-05, |
|
"loss": 0.0243, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 1.418078685186532, |
|
"grad_norm": 0.07494989782571793, |
|
"learning_rate": 1.645480328703367e-05, |
|
"loss": 0.0202, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 1.4221187954007386, |
|
"grad_norm": 0.05562291666865349, |
|
"learning_rate": 1.6444703011498154e-05, |
|
"loss": 0.0204, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.426158905614945, |
|
"grad_norm": 0.11044422537088394, |
|
"learning_rate": 1.643460273596264e-05, |
|
"loss": 0.0202, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 1.4301990158291518, |
|
"grad_norm": 0.11972752958536148, |
|
"learning_rate": 1.642450246042712e-05, |
|
"loss": 0.0195, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 1.4342391260433585, |
|
"grad_norm": 0.06898529082536697, |
|
"learning_rate": 1.6414402184891604e-05, |
|
"loss": 0.0203, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 1.4382792362575652, |
|
"grad_norm": 0.05580909922719002, |
|
"learning_rate": 1.6404301909356087e-05, |
|
"loss": 0.0124, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 1.4400407243109592, |
|
"eval_f1_macro": 0.9458974211933513, |
|
"eval_f1_micro": 0.974213850978252, |
|
"eval_loss": 0.09569641947746277, |
|
"eval_precision_macro": 0.9869461304954816, |
|
"eval_precision_micro": 0.9919091180407337, |
|
"eval_recall_macro": 0.9122157060173365, |
|
"eval_recall_micro": 0.9571388713888294, |
|
"eval_runtime": 13113.6746, |
|
"eval_samples_per_second": 3.02, |
|
"eval_steps_per_second": 0.012, |
|
"step": 178218 |
|
}, |
|
{ |
|
"epoch": 1.4423193464717718, |
|
"grad_norm": 0.09399819374084473, |
|
"learning_rate": 1.639420163382057e-05, |
|
"loss": 0.0116, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 1.4463594566859783, |
|
"grad_norm": 0.06601426005363464, |
|
"learning_rate": 1.6384101358285058e-05, |
|
"loss": 0.0117, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 1.450399566900185, |
|
"grad_norm": 0.11237422376871109, |
|
"learning_rate": 1.6374001082749538e-05, |
|
"loss": 0.0115, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 1.4544396771143917, |
|
"grad_norm": 0.04262951388955116, |
|
"learning_rate": 1.636390080721402e-05, |
|
"loss": 0.0112, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 1.4584797873285984, |
|
"grad_norm": 0.13000500202178955, |
|
"learning_rate": 1.6353800531678504e-05, |
|
"loss": 0.0132, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 1.4625198975428049, |
|
"grad_norm": 0.0949823409318924, |
|
"learning_rate": 1.6343700256142988e-05, |
|
"loss": 0.0129, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 1.4665600077570116, |
|
"grad_norm": 0.04730290174484253, |
|
"learning_rate": 1.6333599980607474e-05, |
|
"loss": 0.0129, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 1.4706001179712183, |
|
"grad_norm": 0.050584714859724045, |
|
"learning_rate": 1.6323499705071958e-05, |
|
"loss": 0.013, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 1.474640228185425, |
|
"grad_norm": 0.1683996021747589, |
|
"learning_rate": 1.6313399429536438e-05, |
|
"loss": 0.0133, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 1.4786803383996316, |
|
"grad_norm": 0.1036485880613327, |
|
"learning_rate": 1.630329915400092e-05, |
|
"loss": 0.0136, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 1.4827204486138381, |
|
"grad_norm": 0.11697889119386673, |
|
"learning_rate": 1.6293198878465404e-05, |
|
"loss": 0.0133, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 1.4867605588280448, |
|
"grad_norm": 0.0688479095697403, |
|
"learning_rate": 1.628309860292989e-05, |
|
"loss": 0.0132, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 1.4908006690422515, |
|
"grad_norm": 0.12002038955688477, |
|
"learning_rate": 1.6272998327394375e-05, |
|
"loss": 0.0131, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 1.4948407792564582, |
|
"grad_norm": 0.08021160215139389, |
|
"learning_rate": 1.6262898051858855e-05, |
|
"loss": 0.0133, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 1.4988808894706647, |
|
"grad_norm": 0.07343757152557373, |
|
"learning_rate": 1.6252797776323338e-05, |
|
"loss": 0.0135, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 1.5029209996848714, |
|
"grad_norm": 0.058117810636758804, |
|
"learning_rate": 1.624269750078782e-05, |
|
"loss": 0.0143, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 1.506961109899078, |
|
"grad_norm": 0.10462002456188202, |
|
"learning_rate": 1.6232597225252308e-05, |
|
"loss": 0.0138, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 1.5110012201132847, |
|
"grad_norm": 0.07825891673564911, |
|
"learning_rate": 1.622249694971679e-05, |
|
"loss": 0.0141, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 1.5150413303274914, |
|
"grad_norm": 0.05809338763356209, |
|
"learning_rate": 1.621239667418127e-05, |
|
"loss": 0.0136, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 1.519081440541698, |
|
"grad_norm": 0.05035299435257912, |
|
"learning_rate": 1.6202296398645755e-05, |
|
"loss": 0.0165, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 1.5200429867726792, |
|
"eval_f1_macro": 0.9465345293457039, |
|
"eval_f1_micro": 0.9742553945189574, |
|
"eval_loss": 0.08490300178527832, |
|
"eval_precision_macro": 0.9844654624463276, |
|
"eval_precision_micro": 0.9898738168824952, |
|
"eval_recall_macro": 0.9150699179615246, |
|
"eval_recall_micro": 0.9591221775256943, |
|
"eval_runtime": 13330.8537, |
|
"eval_samples_per_second": 2.971, |
|
"eval_steps_per_second": 0.012, |
|
"step": 188119 |
|
}, |
|
{ |
|
"epoch": 1.5231215507559046, |
|
"grad_norm": 0.14376361668109894, |
|
"learning_rate": 1.6192196123110238e-05, |
|
"loss": 0.0157, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 1.5271616609701113, |
|
"grad_norm": 0.07897575944662094, |
|
"learning_rate": 1.6182095847574725e-05, |
|
"loss": 0.0159, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 1.531201771184318, |
|
"grad_norm": 0.06912536919116974, |
|
"learning_rate": 1.6171995572039208e-05, |
|
"loss": 0.0163, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 1.5352418813985245, |
|
"grad_norm": 0.05066482350230217, |
|
"learning_rate": 1.6161895296503688e-05, |
|
"loss": 0.0156, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 1.5392819916127312, |
|
"grad_norm": 0.14292369782924652, |
|
"learning_rate": 1.615179502096817e-05, |
|
"loss": 0.0198, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 1.5433221018269379, |
|
"grad_norm": 0.08798356354236603, |
|
"learning_rate": 1.614169474543266e-05, |
|
"loss": 0.0197, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 1.5473622120411445, |
|
"grad_norm": 0.061990030109882355, |
|
"learning_rate": 1.6131594469897142e-05, |
|
"loss": 0.0183, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 1.551402322255351, |
|
"grad_norm": 0.05433070659637451, |
|
"learning_rate": 1.6121494194361625e-05, |
|
"loss": 0.0183, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.5554424324695577, |
|
"grad_norm": 0.13680632412433624, |
|
"learning_rate": 1.6111393918826105e-05, |
|
"loss": 0.0192, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 1.5594825426837644, |
|
"grad_norm": 0.1941196620464325, |
|
"learning_rate": 1.610129364329059e-05, |
|
"loss": 0.0176, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 1.563522652897971, |
|
"grad_norm": 0.08578658103942871, |
|
"learning_rate": 1.6091193367755075e-05, |
|
"loss": 0.0173, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 1.5675627631121778, |
|
"grad_norm": 0.04361563175916672, |
|
"learning_rate": 1.608109309221956e-05, |
|
"loss": 0.0171, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 1.5716028733263843, |
|
"grad_norm": 0.12448256462812424, |
|
"learning_rate": 1.6070992816684042e-05, |
|
"loss": 0.0168, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 1.575642983540591, |
|
"grad_norm": 0.10221997648477554, |
|
"learning_rate": 1.6060892541148522e-05, |
|
"loss": 0.017, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 1.5796830937547977, |
|
"grad_norm": 0.07009778171777725, |
|
"learning_rate": 1.6050792265613005e-05, |
|
"loss": 0.0175, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 1.5837232039690043, |
|
"grad_norm": 0.06714298576116562, |
|
"learning_rate": 1.6040691990077492e-05, |
|
"loss": 0.0174, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 1.5877633141832108, |
|
"grad_norm": 0.12766534090042114, |
|
"learning_rate": 1.6030591714541975e-05, |
|
"loss": 0.0179, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 1.5918034243974175, |
|
"grad_norm": 0.10328399389982224, |
|
"learning_rate": 1.602049143900646e-05, |
|
"loss": 0.0175, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 1.5958435346116242, |
|
"grad_norm": 0.09311484545469284, |
|
"learning_rate": 1.601039116347094e-05, |
|
"loss": 0.0172, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 1.599883644825831, |
|
"grad_norm": 0.08157425373792648, |
|
"learning_rate": 1.6000290887935422e-05, |
|
"loss": 0.0171, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 1.600045249234399, |
|
"eval_f1_macro": 0.9462988853572672, |
|
"eval_f1_micro": 0.9743141624468545, |
|
"eval_loss": 0.09824506938457489, |
|
"eval_precision_macro": 0.9878799683485701, |
|
"eval_precision_micro": 0.9928597658940401, |
|
"eval_recall_macro": 0.9118503420886092, |
|
"eval_recall_micro": 0.9564486807740753, |
|
"eval_runtime": 13226.8299, |
|
"eval_samples_per_second": 2.994, |
|
"eval_steps_per_second": 0.012, |
|
"step": 198020 |
|
}, |
|
{ |
|
"epoch": 1.6039237550400376, |
|
"grad_norm": 0.10958320647478104, |
|
"learning_rate": 1.599019061239991e-05, |
|
"loss": 0.0167, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 1.607963865254244, |
|
"grad_norm": 0.07280286401510239, |
|
"learning_rate": 1.5980090336864392e-05, |
|
"loss": 0.0164, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 1.6120039754684508, |
|
"grad_norm": 0.0816897377371788, |
|
"learning_rate": 1.5969990061328876e-05, |
|
"loss": 0.017, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 1.6160440856826574, |
|
"grad_norm": 0.046233151108026505, |
|
"learning_rate": 1.595988978579336e-05, |
|
"loss": 0.0163, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.6200841958968641, |
|
"grad_norm": 0.13440461456775665, |
|
"learning_rate": 1.594978951025784e-05, |
|
"loss": 0.015, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 1.6241243061110706, |
|
"grad_norm": 0.0861237496137619, |
|
"learning_rate": 1.5939689234722326e-05, |
|
"loss": 0.0146, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 1.6281644163252773, |
|
"grad_norm": 0.06643826514482498, |
|
"learning_rate": 1.592958895918681e-05, |
|
"loss": 0.0142, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 1.632204526539484, |
|
"grad_norm": 0.06138383969664574, |
|
"learning_rate": 1.5919488683651292e-05, |
|
"loss": 0.0143, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 1.6362446367536907, |
|
"grad_norm": 0.13212205469608307, |
|
"learning_rate": 1.5909388408115776e-05, |
|
"loss": 0.0147, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 1.6402847469678974, |
|
"grad_norm": 0.07676049321889877, |
|
"learning_rate": 1.5899288132580256e-05, |
|
"loss": 0.0151, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 1.6443248571821039, |
|
"grad_norm": 0.10008609294891357, |
|
"learning_rate": 1.5889187857044743e-05, |
|
"loss": 0.0152, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 1.6483649673963106, |
|
"grad_norm": 0.04750071465969086, |
|
"learning_rate": 1.5879087581509226e-05, |
|
"loss": 0.0157, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 1.6524050776105172, |
|
"grad_norm": 0.11740187555551529, |
|
"learning_rate": 1.586898730597371e-05, |
|
"loss": 0.0155, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 1.656445187824724, |
|
"grad_norm": 0.06920389086008072, |
|
"learning_rate": 1.5858887030438193e-05, |
|
"loss": 0.0156, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 1.6604852980389304, |
|
"grad_norm": 0.05165468528866768, |
|
"learning_rate": 1.5848786754902673e-05, |
|
"loss": 0.0151, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 1.664525408253137, |
|
"grad_norm": 0.07880023121833801, |
|
"learning_rate": 1.583868647936716e-05, |
|
"loss": 0.0152, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 1.6685655184673438, |
|
"grad_norm": 0.11061804741621017, |
|
"learning_rate": 1.5828586203831643e-05, |
|
"loss": 0.015, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 1.6726056286815505, |
|
"grad_norm": 0.08423452824354172, |
|
"learning_rate": 1.5818485928296126e-05, |
|
"loss": 0.015, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 1.6766457388957572, |
|
"grad_norm": 0.07225336134433746, |
|
"learning_rate": 1.580838565276061e-05, |
|
"loss": 0.0148, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 1.680047511696119, |
|
"eval_f1_macro": 0.9464641577937303, |
|
"eval_f1_micro": 0.9740978113062491, |
|
"eval_loss": 0.08534455299377441, |
|
"eval_precision_macro": 0.9875234035936792, |
|
"eval_precision_micro": 0.9923821627163134, |
|
"eval_recall_macro": 0.9123021051137999, |
|
"eval_recall_micro": 0.9564750381751005, |
|
"eval_runtime": 12543.2073, |
|
"eval_samples_per_second": 3.157, |
|
"eval_steps_per_second": 0.012, |
|
"step": 207921 |
|
}, |
|
{ |
|
"epoch": 1.6806858491099637, |
|
"grad_norm": 0.05870038643479347, |
|
"learning_rate": 1.579828537722509e-05, |
|
"loss": 0.0197, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.6847259593241704, |
|
"grad_norm": 0.11144687980413437, |
|
"learning_rate": 1.5788185101689576e-05, |
|
"loss": 0.0197, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 1.688766069538377, |
|
"grad_norm": 0.07475966960191727, |
|
"learning_rate": 1.577808482615406e-05, |
|
"loss": 0.0195, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 1.6928061797525837, |
|
"grad_norm": 0.05573410540819168, |
|
"learning_rate": 1.5767984550618543e-05, |
|
"loss": 0.0202, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 1.6968462899667902, |
|
"grad_norm": 0.07953529059886932, |
|
"learning_rate": 1.5757884275083026e-05, |
|
"loss": 0.0191, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 1.700886400180997, |
|
"grad_norm": 0.08590356260538101, |
|
"learning_rate": 1.5747783999547506e-05, |
|
"loss": 0.0147, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 1.7049265103952036, |
|
"grad_norm": 0.08645664155483246, |
|
"learning_rate": 1.5737683724011993e-05, |
|
"loss": 0.0145, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 1.7089666206094103, |
|
"grad_norm": 0.059178948402404785, |
|
"learning_rate": 1.5727583448476476e-05, |
|
"loss": 0.0139, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 1.7130067308236168, |
|
"grad_norm": 0.05445469170808792, |
|
"learning_rate": 1.571748317294096e-05, |
|
"loss": 0.0143, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 1.7170468410378235, |
|
"grad_norm": 0.10709578543901443, |
|
"learning_rate": 1.5707382897405443e-05, |
|
"loss": 0.0141, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 1.7210869512520302, |
|
"grad_norm": 0.0663144662976265, |
|
"learning_rate": 1.5697282621869923e-05, |
|
"loss": 0.0121, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 1.7251270614662368, |
|
"grad_norm": 0.0667869821190834, |
|
"learning_rate": 1.568718234633441e-05, |
|
"loss": 0.0125, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 1.7291671716804435, |
|
"grad_norm": 0.09561540186405182, |
|
"learning_rate": 1.5677082070798893e-05, |
|
"loss": 0.0122, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 1.73320728189465, |
|
"grad_norm": 0.09017562866210938, |
|
"learning_rate": 1.5666981795263377e-05, |
|
"loss": 0.0128, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 1.7372473921088567, |
|
"grad_norm": 0.06796102970838547, |
|
"learning_rate": 1.565688151972786e-05, |
|
"loss": 0.0129, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 1.7412875023230634, |
|
"grad_norm": 0.06975946575403214, |
|
"learning_rate": 1.5646781244192343e-05, |
|
"loss": 0.0126, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 1.74532761253727, |
|
"grad_norm": 0.04627285152673721, |
|
"learning_rate": 1.5636680968656827e-05, |
|
"loss": 0.0126, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 1.7493677227514766, |
|
"grad_norm": 0.12213249504566193, |
|
"learning_rate": 1.562658069312131e-05, |
|
"loss": 0.0124, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 1.7534078329656833, |
|
"grad_norm": 0.0799461305141449, |
|
"learning_rate": 1.5616480417585793e-05, |
|
"loss": 0.0125, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 1.75744794317989, |
|
"grad_norm": 0.05975542962551117, |
|
"learning_rate": 1.5606380142050277e-05, |
|
"loss": 0.0124, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 1.760049774157839, |
|
"eval_f1_macro": 0.9468023383313312, |
|
"eval_f1_micro": 0.9742265323429071, |
|
"eval_loss": 0.08699483424425125, |
|
"eval_precision_macro": 0.9835541832835969, |
|
"eval_precision_micro": 0.9902263876070855, |
|
"eval_recall_macro": 0.915890529699618, |
|
"eval_recall_micro": 0.9587354997620732, |
|
"eval_runtime": 12459.0163, |
|
"eval_samples_per_second": 3.179, |
|
"eval_steps_per_second": 0.012, |
|
"step": 217822 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 990072, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 9901, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.468409508314638e+19, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|