{ "best_metric": 0.6971784234046936, "best_model_checkpoint": "./cardiffnlp-twitter-xlmr-finetuned-txtnly-all-42/checkpoint-16500", "epoch": 3.787878787878788, "eval_steps": 500, "global_step": 31500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "grad_norm": 3.298647880554199, "learning_rate": 4.994023569023569e-05, "loss": 0.6122, "step": 500 }, { "epoch": 0.06, "eval_f1": 0.4840638597456899, "eval_loss": 0.854165256023407, "eval_precision": 0.6558887250350466, "eval_recall": 0.49045198529069495, "eval_runtime": 5.9285, "eval_samples_per_second": 148.099, "eval_steps_per_second": 9.277, "step": 500 }, { "epoch": 0.12, "grad_norm": 5.411099433898926, "learning_rate": 4.988011063011063e-05, "loss": 0.5497, "step": 1000 }, { "epoch": 0.12, "eval_f1": 0.6209225023342669, "eval_loss": 0.8037390112876892, "eval_precision": 0.704421745545341, "eval_recall": 0.6070083321696225, "eval_runtime": 6.1691, "eval_samples_per_second": 142.322, "eval_steps_per_second": 8.915, "step": 1000 }, { "epoch": 0.18, "grad_norm": 5.836483001708984, "learning_rate": 4.9820105820105825e-05, "loss": 0.5404, "step": 1500 }, { "epoch": 0.18, "eval_f1": 0.3652071944289921, "eval_loss": 0.9700150489807129, "eval_precision": 0.5591482310679367, "eval_recall": 0.4176288227901131, "eval_runtime": 5.8886, "eval_samples_per_second": 149.101, "eval_steps_per_second": 9.34, "step": 1500 }, { "epoch": 0.24, "grad_norm": 13.717193603515625, "learning_rate": 4.975998075998076e-05, "loss": 0.5165, "step": 2000 }, { "epoch": 0.24, "eval_f1": 0.5369027892847279, "eval_loss": 0.744874894618988, "eval_precision": 0.7349445049700448, "eval_recall": 0.529664385793418, "eval_runtime": 5.996, "eval_samples_per_second": 146.43, "eval_steps_per_second": 9.173, "step": 2000 }, { "epoch": 0.3, "grad_norm": 2.4534995555877686, "learning_rate": 4.969997594997595e-05, "loss": 0.5136, "step": 2500 }, { "epoch": 0.3, "eval_f1": 0.5001381202499963, "eval_loss": 0.7884698510169983, "eval_precision": 0.6766332095394413, "eval_recall": 0.5025275799469348, "eval_runtime": 5.9085, "eval_samples_per_second": 148.6, "eval_steps_per_second": 9.309, "step": 2500 }, { "epoch": 0.36, "grad_norm": 3.195244550704956, "learning_rate": 4.963985088985089e-05, "loss": 0.5072, "step": 3000 }, { "epoch": 0.36, "eval_f1": 0.5917137619940201, "eval_loss": 0.8123684525489807, "eval_precision": 0.6076358199852175, "eval_recall": 0.6132374435600242, "eval_runtime": 6.1108, "eval_samples_per_second": 143.68, "eval_steps_per_second": 9.0, "step": 3000 }, { "epoch": 0.42, "grad_norm": 7.579603672027588, "learning_rate": 4.957972582972583e-05, "loss": 0.5011, "step": 3500 }, { "epoch": 0.42, "eval_f1": 0.578405909718061, "eval_loss": 0.8767459392547607, "eval_precision": 0.642659899090607, "eval_recall": 0.5987143322627193, "eval_runtime": 6.1563, "eval_samples_per_second": 142.618, "eval_steps_per_second": 8.934, "step": 3500 }, { "epoch": 0.48, "grad_norm": 3.266787052154541, "learning_rate": 4.951960076960077e-05, "loss": 0.5021, "step": 4000 }, { "epoch": 0.48, "eval_f1": 0.6502990015105321, "eval_loss": 0.7957776784896851, "eval_precision": 0.6847923256926328, "eval_recall": 0.636192338127822, "eval_runtime": 6.5221, "eval_samples_per_second": 134.618, "eval_steps_per_second": 8.433, "step": 4000 }, { "epoch": 0.54, "grad_norm": 6.044332027435303, "learning_rate": 4.945959595959596e-05, "loss": 0.4946, "step": 4500 }, { "epoch": 0.54, "eval_f1": 0.4982912515017284, "eval_loss": 0.8045271039009094, "eval_precision": 0.7220405815528763, "eval_recall": 0.4968300516687614, "eval_runtime": 6.1928, "eval_samples_per_second": 141.778, "eval_steps_per_second": 8.881, "step": 4500 }, { "epoch": 0.6, "grad_norm": 5.152063846588135, "learning_rate": 4.93994708994709e-05, "loss": 0.4928, "step": 5000 }, { "epoch": 0.6, "eval_f1": 0.550273048506264, "eval_loss": 0.780342698097229, "eval_precision": 0.7581894624319455, "eval_recall": 0.5380887213145278, "eval_runtime": 6.123, "eval_samples_per_second": 143.395, "eval_steps_per_second": 8.983, "step": 5000 }, { "epoch": 0.66, "grad_norm": 4.54200553894043, "learning_rate": 4.933934583934584e-05, "loss": 0.5008, "step": 5500 }, { "epoch": 0.66, "eval_f1": 0.4594232264185665, "eval_loss": 0.7507085204124451, "eval_precision": 0.44070483572560937, "eval_recall": 0.47984452823162504, "eval_runtime": 5.932, "eval_samples_per_second": 148.011, "eval_steps_per_second": 9.272, "step": 5500 }, { "epoch": 0.72, "grad_norm": 4.075632095336914, "learning_rate": 4.927922077922078e-05, "loss": 0.4966, "step": 6000 }, { "epoch": 0.72, "eval_f1": 0.6310991936984806, "eval_loss": 0.8238988518714905, "eval_precision": 0.6139657275796522, "eval_recall": 0.6767434715821813, "eval_runtime": 5.8918, "eval_samples_per_second": 149.02, "eval_steps_per_second": 9.335, "step": 6000 }, { "epoch": 0.78, "grad_norm": 4.8725104331970215, "learning_rate": 4.921909571909572e-05, "loss": 0.4791, "step": 6500 }, { "epoch": 0.78, "eval_f1": 0.5412559573187593, "eval_loss": 0.7028306722640991, "eval_precision": 0.6567775474615866, "eval_recall": 0.520631196760229, "eval_runtime": 6.3113, "eval_samples_per_second": 139.116, "eval_steps_per_second": 8.715, "step": 6500 }, { "epoch": 0.84, "grad_norm": 1.4915893077850342, "learning_rate": 4.915897065897066e-05, "loss": 0.494, "step": 7000 }, { "epoch": 0.84, "eval_f1": 0.5227267406470947, "eval_loss": 0.8033522367477417, "eval_precision": 0.6660302960734323, "eval_recall": 0.5188623562817111, "eval_runtime": 6.1252, "eval_samples_per_second": 143.342, "eval_steps_per_second": 8.979, "step": 7000 }, { "epoch": 0.9, "grad_norm": 2.151014804840088, "learning_rate": 4.90988455988456e-05, "loss": 0.4861, "step": 7500 }, { "epoch": 0.9, "eval_f1": 0.4541201667750796, "eval_loss": 0.900325357913971, "eval_precision": 0.5780562441152168, "eval_recall": 0.4784564539403249, "eval_runtime": 6.144, "eval_samples_per_second": 142.903, "eval_steps_per_second": 8.952, "step": 7500 }, { "epoch": 0.96, "grad_norm": 4.770496368408203, "learning_rate": 4.903872053872054e-05, "loss": 0.4804, "step": 8000 }, { "epoch": 0.96, "eval_f1": 0.5791890202588422, "eval_loss": 0.773960530757904, "eval_precision": 0.6238945275403609, "eval_recall": 0.5775003491132523, "eval_runtime": 6.556, "eval_samples_per_second": 133.923, "eval_steps_per_second": 8.389, "step": 8000 }, { "epoch": 1.02, "grad_norm": 2.520460367202759, "learning_rate": 4.897859547859548e-05, "loss": 0.4614, "step": 8500 }, { "epoch": 1.02, "eval_f1": 0.6470888284841774, "eval_loss": 0.7397181391716003, "eval_precision": 0.6848151355984641, "eval_recall": 0.6312358609132803, "eval_runtime": 6.1813, "eval_samples_per_second": 142.042, "eval_steps_per_second": 8.898, "step": 8500 }, { "epoch": 1.08, "grad_norm": 4.375688552856445, "learning_rate": 4.891847041847042e-05, "loss": 0.4315, "step": 9000 }, { "epoch": 1.08, "eval_f1": 0.614857769662433, "eval_loss": 0.788919985294342, "eval_precision": 0.6641593406916259, "eval_recall": 0.6034743750872783, "eval_runtime": 6.1798, "eval_samples_per_second": 142.076, "eval_steps_per_second": 8.9, "step": 9000 }, { "epoch": 1.14, "grad_norm": 4.091088771820068, "learning_rate": 4.885834535834536e-05, "loss": 0.4506, "step": 9500 }, { "epoch": 1.14, "eval_f1": 0.4967964786589283, "eval_loss": 0.8783875703811646, "eval_precision": 0.6387377173091459, "eval_recall": 0.5016645719871526, "eval_runtime": 5.9164, "eval_samples_per_second": 148.401, "eval_steps_per_second": 9.296, "step": 9500 }, { "epoch": 1.2, "grad_norm": 3.3903276920318604, "learning_rate": 4.87982202982203e-05, "loss": 0.4489, "step": 10000 }, { "epoch": 1.2, "eval_f1": 0.4949153076705755, "eval_loss": 0.7994188070297241, "eval_precision": 0.5340329579250159, "eval_recall": 0.49638597961178615, "eval_runtime": 5.9029, "eval_samples_per_second": 148.74, "eval_steps_per_second": 9.317, "step": 10000 }, { "epoch": 1.26, "grad_norm": 3.929879903793335, "learning_rate": 4.8738095238095235e-05, "loss": 0.4466, "step": 10500 }, { "epoch": 1.26, "eval_f1": 0.44642812881455524, "eval_loss": 0.8109920024871826, "eval_precision": 0.5776119229607602, "eval_recall": 0.47351207931853095, "eval_runtime": 5.9766, "eval_samples_per_second": 146.907, "eval_steps_per_second": 9.203, "step": 10500 }, { "epoch": 1.32, "grad_norm": 6.443171501159668, "learning_rate": 4.8677970177970176e-05, "loss": 0.4319, "step": 11000 }, { "epoch": 1.32, "eval_f1": 0.5481427288492505, "eval_loss": 0.8068605661392212, "eval_precision": 0.6612496177619213, "eval_recall": 0.5399497276916632, "eval_runtime": 5.9001, "eval_samples_per_second": 148.811, "eval_steps_per_second": 9.322, "step": 11000 }, { "epoch": 1.38, "grad_norm": 7.633645057678223, "learning_rate": 4.8617845117845116e-05, "loss": 0.4243, "step": 11500 }, { "epoch": 1.38, "eval_f1": 0.5797306372413114, "eval_loss": 0.7941620349884033, "eval_precision": 0.5948358635007136, "eval_recall": 0.5704752595075175, "eval_runtime": 6.145, "eval_samples_per_second": 142.881, "eval_steps_per_second": 8.95, "step": 11500 }, { "epoch": 1.44, "grad_norm": 3.275371789932251, "learning_rate": 4.8557720057720056e-05, "loss": 0.4398, "step": 12000 }, { "epoch": 1.44, "eval_f1": 0.5247242844808815, "eval_loss": 0.9738017916679382, "eval_precision": 0.5370369073777802, "eval_recall": 0.6070139179816599, "eval_runtime": 6.219, "eval_samples_per_second": 141.18, "eval_steps_per_second": 8.844, "step": 12000 }, { "epoch": 1.5, "grad_norm": 2.4162724018096924, "learning_rate": 4.8497594997595e-05, "loss": 0.4526, "step": 12500 }, { "epoch": 1.5, "eval_f1": 0.5589742980399895, "eval_loss": 0.7195601463317871, "eval_precision": 0.7046240283838195, "eval_recall": 0.5477959316668994, "eval_runtime": 6.3918, "eval_samples_per_second": 137.363, "eval_steps_per_second": 8.605, "step": 12500 }, { "epoch": 1.56, "grad_norm": 6.926381587982178, "learning_rate": 4.8437469937469944e-05, "loss": 0.4529, "step": 13000 }, { "epoch": 1.56, "eval_f1": 0.5863097712686139, "eval_loss": 0.8049713969230652, "eval_precision": 0.6419448505612538, "eval_recall": 0.5730605595121724, "eval_runtime": 6.3636, "eval_samples_per_second": 137.971, "eval_steps_per_second": 8.643, "step": 13000 }, { "epoch": 1.62, "grad_norm": 1.8420650959014893, "learning_rate": 4.837746512746513e-05, "loss": 0.446, "step": 13500 }, { "epoch": 1.62, "eval_f1": 0.6107236144330398, "eval_loss": 0.7564206719398499, "eval_precision": 0.6520992658162544, "eval_recall": 0.5912358609132803, "eval_runtime": 6.4128, "eval_samples_per_second": 136.914, "eval_steps_per_second": 8.577, "step": 13500 }, { "epoch": 1.68, "grad_norm": 2.423569679260254, "learning_rate": 4.831746031746032e-05, "loss": 0.4315, "step": 14000 }, { "epoch": 1.68, "eval_f1": 0.621245910301715, "eval_loss": 0.751511812210083, "eval_precision": 0.6474767054531395, "eval_recall": 0.6069198901456967, "eval_runtime": 5.9833, "eval_samples_per_second": 146.741, "eval_steps_per_second": 9.192, "step": 14000 }, { "epoch": 1.74, "grad_norm": 6.773381233215332, "learning_rate": 4.825733525733526e-05, "loss": 0.4464, "step": 14500 }, { "epoch": 1.74, "eval_f1": 0.559868694735591, "eval_loss": 0.8307517170906067, "eval_precision": 0.627583612882644, "eval_recall": 0.5512991667830377, "eval_runtime": 6.1679, "eval_samples_per_second": 142.35, "eval_steps_per_second": 8.917, "step": 14500 }, { "epoch": 1.8, "grad_norm": 6.220128059387207, "learning_rate": 4.8197330447330455e-05, "loss": 0.4423, "step": 15000 }, { "epoch": 1.8, "eval_f1": 0.5991996711711277, "eval_loss": 0.798150360584259, "eval_precision": 0.6176196711770697, "eval_recall": 0.5936535865568123, "eval_runtime": 6.0738, "eval_samples_per_second": 144.556, "eval_steps_per_second": 9.055, "step": 15000 }, { "epoch": 1.86, "grad_norm": 1.1065833568572998, "learning_rate": 4.8137205387205395e-05, "loss": 0.4551, "step": 15500 }, { "epoch": 1.86, "eval_f1": 0.6019748538222912, "eval_loss": 0.822293221950531, "eval_precision": 0.6355921902599784, "eval_recall": 0.5933528836754642, "eval_runtime": 6.1197, "eval_samples_per_second": 143.472, "eval_steps_per_second": 8.987, "step": 15500 }, { "epoch": 1.92, "grad_norm": 8.631648063659668, "learning_rate": 4.807708032708033e-05, "loss": 0.4408, "step": 16000 }, { "epoch": 1.92, "eval_f1": 0.5131249172090748, "eval_loss": 0.7691208124160767, "eval_precision": 0.608759764068229, "eval_recall": 0.5147484057161477, "eval_runtime": 6.3609, "eval_samples_per_second": 138.031, "eval_steps_per_second": 8.647, "step": 16000 }, { "epoch": 1.98, "grad_norm": 6.755849361419678, "learning_rate": 4.801695526695527e-05, "loss": 0.4389, "step": 16500 }, { "epoch": 1.98, "eval_f1": 0.6702519892656928, "eval_loss": 0.6971784234046936, "eval_precision": 0.6686766810877821, "eval_recall": 0.6729106735558349, "eval_runtime": 6.1341, "eval_samples_per_second": 143.134, "eval_steps_per_second": 8.966, "step": 16500 }, { "epoch": 2.04, "grad_norm": 19.813188552856445, "learning_rate": 4.795683020683021e-05, "loss": 0.3886, "step": 17000 }, { "epoch": 2.04, "eval_f1": 0.5543489692487942, "eval_loss": 0.7798230648040771, "eval_precision": 0.6125764375980934, "eval_recall": 0.543671740445934, "eval_runtime": 6.7491, "eval_samples_per_second": 130.09, "eval_steps_per_second": 8.149, "step": 17000 }, { "epoch": 2.1, "grad_norm": 7.927220821380615, "learning_rate": 4.789670514670515e-05, "loss": 0.3883, "step": 17500 }, { "epoch": 2.1, "eval_f1": 0.5978449313058904, "eval_loss": 0.8385018110275269, "eval_precision": 0.5948463716988197, "eval_recall": 0.6225499231950845, "eval_runtime": 6.122, "eval_samples_per_second": 143.416, "eval_steps_per_second": 8.984, "step": 17500 }, { "epoch": 2.16, "grad_norm": 6.237366199493408, "learning_rate": 4.783658008658009e-05, "loss": 0.4011, "step": 18000 }, { "epoch": 2.16, "eval_f1": 0.5914931472808443, "eval_loss": 0.7754688858985901, "eval_precision": 0.655128213311837, "eval_recall": 0.578716194200065, "eval_runtime": 6.558, "eval_samples_per_second": 133.882, "eval_steps_per_second": 8.387, "step": 18000 }, { "epoch": 2.22, "grad_norm": 3.3301048278808594, "learning_rate": 4.777645502645503e-05, "loss": 0.3992, "step": 18500 }, { "epoch": 2.22, "eval_f1": 0.5472455226037474, "eval_loss": 0.788632333278656, "eval_precision": 0.558195855728615, "eval_recall": 0.5519042964204254, "eval_runtime": 6.124, "eval_samples_per_second": 143.371, "eval_steps_per_second": 8.981, "step": 18500 }, { "epoch": 2.28, "grad_norm": 8.471348762512207, "learning_rate": 4.771645021645022e-05, "loss": 0.393, "step": 19000 }, { "epoch": 2.28, "eval_f1": 0.5889012942356766, "eval_loss": 0.7660124897956848, "eval_precision": 0.5901145289176211, "eval_recall": 0.592326956197924, "eval_runtime": 5.8572, "eval_samples_per_second": 149.9, "eval_steps_per_second": 9.39, "step": 19000 }, { "epoch": 2.34, "grad_norm": 15.840304374694824, "learning_rate": 4.765632515632516e-05, "loss": 0.3891, "step": 19500 }, { "epoch": 2.34, "eval_f1": 0.5354251462409856, "eval_loss": 0.7701670527458191, "eval_precision": 0.579215207029406, "eval_recall": 0.5330605595121725, "eval_runtime": 6.1187, "eval_samples_per_second": 143.495, "eval_steps_per_second": 8.989, "step": 19500 }, { "epoch": 2.41, "grad_norm": 1.6515294313430786, "learning_rate": 4.75962000962001e-05, "loss": 0.4119, "step": 20000 }, { "epoch": 2.41, "eval_f1": 0.5110658029804255, "eval_loss": 0.8545361161231995, "eval_precision": 0.5405823804957771, "eval_recall": 0.5243262114229856, "eval_runtime": 6.2418, "eval_samples_per_second": 140.665, "eval_steps_per_second": 8.812, "step": 20000 }, { "epoch": 2.47, "grad_norm": 3.166147470474243, "learning_rate": 4.753607503607504e-05, "loss": 0.3981, "step": 20500 }, { "epoch": 2.47, "eval_f1": 0.53639943040752, "eval_loss": 0.864085853099823, "eval_precision": 0.5695344700259635, "eval_recall": 0.5536247265279522, "eval_runtime": 5.9635, "eval_samples_per_second": 147.229, "eval_steps_per_second": 9.223, "step": 20500 }, { "epoch": 2.53, "grad_norm": 4.143538475036621, "learning_rate": 4.747594997594998e-05, "loss": 0.4, "step": 21000 }, { "epoch": 2.53, "eval_f1": 0.582186065915728, "eval_loss": 0.8044998049736023, "eval_precision": 0.5987904356270873, "eval_recall": 0.5844826141600334, "eval_runtime": 5.9156, "eval_samples_per_second": 148.422, "eval_steps_per_second": 9.298, "step": 21000 }, { "epoch": 2.59, "grad_norm": 5.849362850189209, "learning_rate": 4.741582491582492e-05, "loss": 0.4059, "step": 21500 }, { "epoch": 2.59, "eval_f1": 0.569600279809319, "eval_loss": 0.8023470044136047, "eval_precision": 0.6300909361955873, "eval_recall": 0.5548880510170833, "eval_runtime": 5.9073, "eval_samples_per_second": 148.629, "eval_steps_per_second": 9.31, "step": 21500 }, { "epoch": 2.65, "grad_norm": 2.0296847820281982, "learning_rate": 4.735582010582011e-05, "loss": 0.3805, "step": 22000 }, { "epoch": 2.65, "eval_f1": 0.5387095557628462, "eval_loss": 0.8242425322532654, "eval_precision": 0.5632921859195318, "eval_recall": 0.536337569240795, "eval_runtime": 6.1681, "eval_samples_per_second": 142.345, "eval_steps_per_second": 8.917, "step": 22000 }, { "epoch": 2.71, "grad_norm": 5.022754192352295, "learning_rate": 4.729569504569505e-05, "loss": 0.4126, "step": 22500 }, { "epoch": 2.71, "eval_f1": 0.525337187977395, "eval_loss": 0.8866151571273804, "eval_precision": 0.563019122327633, "eval_recall": 0.5244211702276219, "eval_runtime": 6.5791, "eval_samples_per_second": 133.453, "eval_steps_per_second": 8.36, "step": 22500 }, { "epoch": 2.77, "grad_norm": 6.320919990539551, "learning_rate": 4.7235690235690236e-05, "loss": 0.3959, "step": 23000 }, { "epoch": 2.77, "eval_f1": 0.5715827904573106, "eval_loss": 0.922848641872406, "eval_precision": 0.6485667793604627, "eval_recall": 0.5569566634082763, "eval_runtime": 6.5486, "eval_samples_per_second": 134.075, "eval_steps_per_second": 8.399, "step": 23000 }, { "epoch": 2.83, "grad_norm": 3.2674639225006104, "learning_rate": 4.717556517556518e-05, "loss": 0.3972, "step": 23500 }, { "epoch": 2.83, "eval_f1": 0.6330230633421515, "eval_loss": 0.8297170400619507, "eval_precision": 0.64149542011954, "eval_recall": 0.633559558720849, "eval_runtime": 6.1502, "eval_samples_per_second": 142.759, "eval_steps_per_second": 8.943, "step": 23500 }, { "epoch": 2.89, "grad_norm": 5.248292922973633, "learning_rate": 4.711544011544012e-05, "loss": 0.3779, "step": 24000 }, { "epoch": 2.89, "eval_f1": 0.5897470753706388, "eval_loss": 0.8682935833930969, "eval_precision": 0.6023327508623889, "eval_recall": 0.5919508448540706, "eval_runtime": 6.3839, "eval_samples_per_second": 137.534, "eval_steps_per_second": 8.615, "step": 24000 }, { "epoch": 2.95, "grad_norm": 4.1834635734558105, "learning_rate": 4.705531505531506e-05, "loss": 0.3951, "step": 24500 }, { "epoch": 2.95, "eval_f1": 0.5124969418380673, "eval_loss": 0.8628427982330322, "eval_precision": 0.5891878367677518, "eval_recall": 0.5116492110040497, "eval_runtime": 6.1272, "eval_samples_per_second": 143.295, "eval_steps_per_second": 8.976, "step": 24500 }, { "epoch": 3.01, "grad_norm": 12.86809253692627, "learning_rate": 4.699518999519e-05, "loss": 0.3916, "step": 25000 }, { "epoch": 3.01, "eval_f1": 0.5024144172335627, "eval_loss": 0.9203388094902039, "eval_precision": 0.6304846593419121, "eval_recall": 0.5026001955034213, "eval_runtime": 6.0613, "eval_samples_per_second": 144.854, "eval_steps_per_second": 9.074, "step": 25000 }, { "epoch": 3.07, "grad_norm": 3.2101404666900635, "learning_rate": 4.693506493506494e-05, "loss": 0.3524, "step": 25500 }, { "epoch": 3.07, "eval_f1": 0.5010573535401949, "eval_loss": 0.9825400710105896, "eval_precision": 0.6088672873311428, "eval_recall": 0.5039249639249639, "eval_runtime": 5.9279, "eval_samples_per_second": 148.113, "eval_steps_per_second": 9.278, "step": 25500 }, { "epoch": 3.13, "grad_norm": 16.025983810424805, "learning_rate": 4.687493987493988e-05, "loss": 0.3332, "step": 26000 }, { "epoch": 3.13, "eval_f1": 0.5814110917677252, "eval_loss": 0.8755331635475159, "eval_precision": 0.5979503457905185, "eval_recall": 0.5711502117953731, "eval_runtime": 6.5321, "eval_samples_per_second": 134.413, "eval_steps_per_second": 8.42, "step": 26000 }, { "epoch": 3.19, "grad_norm": 12.575716972351074, "learning_rate": 4.681481481481482e-05, "loss": 0.3517, "step": 26500 }, { "epoch": 3.19, "eval_f1": 0.6181463909269773, "eval_loss": 0.9921577572822571, "eval_precision": 0.6701390442386371, "eval_recall": 0.5940511101801424, "eval_runtime": 6.2002, "eval_samples_per_second": 141.609, "eval_steps_per_second": 8.871, "step": 26500 }, { "epoch": 3.25, "grad_norm": 2.219468355178833, "learning_rate": 4.675468975468976e-05, "loss": 0.3534, "step": 27000 }, { "epoch": 3.25, "eval_f1": 0.5242620258087817, "eval_loss": 0.9572548866271973, "eval_precision": 0.5652503976549385, "eval_recall": 0.5174640413350091, "eval_runtime": 6.4041, "eval_samples_per_second": 137.101, "eval_steps_per_second": 8.588, "step": 27000 }, { "epoch": 3.31, "grad_norm": 2.1716973781585693, "learning_rate": 4.6694684944684945e-05, "loss": 0.3544, "step": 27500 }, { "epoch": 3.31, "eval_f1": 0.5551290620723939, "eval_loss": 0.9826774001121521, "eval_precision": 0.5738657811880764, "eval_recall": 0.5531322440999861, "eval_runtime": 5.8897, "eval_samples_per_second": 149.075, "eval_steps_per_second": 9.338, "step": 27500 }, { "epoch": 3.37, "grad_norm": 5.642761707305908, "learning_rate": 4.6634559884559885e-05, "loss": 0.3526, "step": 28000 }, { "epoch": 3.37, "eval_f1": 0.46574966897620484, "eval_loss": 0.9517427682876587, "eval_precision": 0.6019158514451703, "eval_recall": 0.4737364427687008, "eval_runtime": 6.2232, "eval_samples_per_second": 141.086, "eval_steps_per_second": 8.838, "step": 28000 }, { "epoch": 3.43, "grad_norm": 8.693815231323242, "learning_rate": 4.6574434824434825e-05, "loss": 0.3448, "step": 28500 }, { "epoch": 3.43, "eval_f1": 0.5231658522131929, "eval_loss": 0.955856204032898, "eval_precision": 0.5743577178625582, "eval_recall": 0.5138062654191686, "eval_runtime": 6.2254, "eval_samples_per_second": 141.036, "eval_steps_per_second": 8.835, "step": 28500 }, { "epoch": 3.49, "grad_norm": 10.058433532714844, "learning_rate": 4.6514309764309766e-05, "loss": 0.3662, "step": 29000 }, { "epoch": 3.49, "eval_f1": 0.6173176500366803, "eval_loss": 0.8469758033752441, "eval_precision": 0.6416565078769693, "eval_recall": 0.6176418563515337, "eval_runtime": 6.1339, "eval_samples_per_second": 143.14, "eval_steps_per_second": 8.967, "step": 29000 }, { "epoch": 3.55, "grad_norm": 9.207432746887207, "learning_rate": 4.645466570466571e-05, "loss": 0.3502, "step": 29500 }, { "epoch": 3.55, "eval_f1": 0.5911826792863208, "eval_loss": 0.8524171113967896, "eval_precision": 0.6606129937002267, "eval_recall": 0.577619513103384, "eval_runtime": 5.9367, "eval_samples_per_second": 147.893, "eval_steps_per_second": 9.264, "step": 29500 }, { "epoch": 3.61, "grad_norm": 2.538233757019043, "learning_rate": 4.639454064454065e-05, "loss": 0.3733, "step": 30000 }, { "epoch": 3.61, "eval_f1": 0.5466184654496565, "eval_loss": 0.9210164546966553, "eval_precision": 0.5577658998711631, "eval_recall": 0.5554857329050877, "eval_runtime": 6.4254, "eval_samples_per_second": 136.645, "eval_steps_per_second": 8.56, "step": 30000 }, { "epoch": 3.67, "grad_norm": 2.017235279083252, "learning_rate": 4.633441558441559e-05, "loss": 0.3424, "step": 30500 }, { "epoch": 3.67, "eval_f1": 0.5809192439862544, "eval_loss": 0.9294881820678711, "eval_precision": 0.5863171312403235, "eval_recall": 0.6100302564818694, "eval_runtime": 6.2949, "eval_samples_per_second": 139.477, "eval_steps_per_second": 8.737, "step": 30500 }, { "epoch": 3.73, "grad_norm": 7.538774490356445, "learning_rate": 4.627429052429053e-05, "loss": 0.3591, "step": 31000 }, { "epoch": 3.73, "eval_f1": 0.4588251776601326, "eval_loss": 0.970705509185791, "eval_precision": 0.5827537007312288, "eval_recall": 0.4768803239770982, "eval_runtime": 6.0168, "eval_samples_per_second": 145.925, "eval_steps_per_second": 9.141, "step": 31000 }, { "epoch": 3.79, "grad_norm": 4.64936637878418, "learning_rate": 4.621416546416546e-05, "loss": 0.3634, "step": 31500 }, { "epoch": 3.79, "eval_f1": 0.575160103511553, "eval_loss": 0.8524229526519775, "eval_precision": 0.6136046998053873, "eval_recall": 0.5680603267700042, "eval_runtime": 6.5694, "eval_samples_per_second": 133.651, "eval_steps_per_second": 8.372, "step": 31500 }, { "epoch": 3.79, "step": 31500, "total_flos": 1.3260126913238016e+17, "train_loss": 0.42908321610708083, "train_runtime": 7590.5884, "train_samples_per_second": 876.388, "train_steps_per_second": 54.778 } ], "logging_steps": 500, "max_steps": 415800, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1.3260126913238016e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }