{ "best_metric": 0.6971784234046936, "best_model_checkpoint": "./cardiffnlp-twitter-xlmr-finetuned-txtnly-all-42/checkpoint-16500", "epoch": 1.9841269841269842, "eval_steps": 500, "global_step": 16500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "grad_norm": 3.298647880554199, "learning_rate": 4.994023569023569e-05, "loss": 0.6122, "step": 500 }, { "epoch": 0.06, "eval_f1": 0.4840638597456899, "eval_loss": 0.854165256023407, "eval_precision": 0.6558887250350466, "eval_recall": 0.49045198529069495, "eval_runtime": 5.9285, "eval_samples_per_second": 148.099, "eval_steps_per_second": 9.277, "step": 500 }, { "epoch": 0.12, "grad_norm": 5.411099433898926, "learning_rate": 4.988011063011063e-05, "loss": 0.5497, "step": 1000 }, { "epoch": 0.12, "eval_f1": 0.6209225023342669, "eval_loss": 0.8037390112876892, "eval_precision": 0.704421745545341, "eval_recall": 0.6070083321696225, "eval_runtime": 6.1691, "eval_samples_per_second": 142.322, "eval_steps_per_second": 8.915, "step": 1000 }, { "epoch": 0.18, "grad_norm": 5.836483001708984, "learning_rate": 4.9820105820105825e-05, "loss": 0.5404, "step": 1500 }, { "epoch": 0.18, "eval_f1": 0.3652071944289921, "eval_loss": 0.9700150489807129, "eval_precision": 0.5591482310679367, "eval_recall": 0.4176288227901131, "eval_runtime": 5.8886, "eval_samples_per_second": 149.101, "eval_steps_per_second": 9.34, "step": 1500 }, { "epoch": 0.24, "grad_norm": 13.717193603515625, "learning_rate": 4.975998075998076e-05, "loss": 0.5165, "step": 2000 }, { "epoch": 0.24, "eval_f1": 0.5369027892847279, "eval_loss": 0.744874894618988, "eval_precision": 0.7349445049700448, "eval_recall": 0.529664385793418, "eval_runtime": 5.996, "eval_samples_per_second": 146.43, "eval_steps_per_second": 9.173, "step": 2000 }, { "epoch": 0.3, "grad_norm": 2.4534995555877686, "learning_rate": 4.969997594997595e-05, "loss": 0.5136, "step": 2500 }, { "epoch": 0.3, "eval_f1": 0.5001381202499963, "eval_loss": 0.7884698510169983, "eval_precision": 0.6766332095394413, "eval_recall": 0.5025275799469348, "eval_runtime": 5.9085, "eval_samples_per_second": 148.6, "eval_steps_per_second": 9.309, "step": 2500 }, { "epoch": 0.36, "grad_norm": 3.195244550704956, "learning_rate": 4.963985088985089e-05, "loss": 0.5072, "step": 3000 }, { "epoch": 0.36, "eval_f1": 0.5917137619940201, "eval_loss": 0.8123684525489807, "eval_precision": 0.6076358199852175, "eval_recall": 0.6132374435600242, "eval_runtime": 6.1108, "eval_samples_per_second": 143.68, "eval_steps_per_second": 9.0, "step": 3000 }, { "epoch": 0.42, "grad_norm": 7.579603672027588, "learning_rate": 4.957972582972583e-05, "loss": 0.5011, "step": 3500 }, { "epoch": 0.42, "eval_f1": 0.578405909718061, "eval_loss": 0.8767459392547607, "eval_precision": 0.642659899090607, "eval_recall": 0.5987143322627193, "eval_runtime": 6.1563, "eval_samples_per_second": 142.618, "eval_steps_per_second": 8.934, "step": 3500 }, { "epoch": 0.48, "grad_norm": 3.266787052154541, "learning_rate": 4.951960076960077e-05, "loss": 0.5021, "step": 4000 }, { "epoch": 0.48, "eval_f1": 0.6502990015105321, "eval_loss": 0.7957776784896851, "eval_precision": 0.6847923256926328, "eval_recall": 0.636192338127822, "eval_runtime": 6.5221, "eval_samples_per_second": 134.618, "eval_steps_per_second": 8.433, "step": 4000 }, { "epoch": 0.54, "grad_norm": 6.044332027435303, "learning_rate": 4.945959595959596e-05, "loss": 0.4946, "step": 4500 }, { "epoch": 0.54, "eval_f1": 0.4982912515017284, "eval_loss": 0.8045271039009094, "eval_precision": 0.7220405815528763, "eval_recall": 0.4968300516687614, "eval_runtime": 6.1928, "eval_samples_per_second": 141.778, "eval_steps_per_second": 8.881, "step": 4500 }, { "epoch": 0.6, "grad_norm": 5.152063846588135, "learning_rate": 4.93994708994709e-05, "loss": 0.4928, "step": 5000 }, { "epoch": 0.6, "eval_f1": 0.550273048506264, "eval_loss": 0.780342698097229, "eval_precision": 0.7581894624319455, "eval_recall": 0.5380887213145278, "eval_runtime": 6.123, "eval_samples_per_second": 143.395, "eval_steps_per_second": 8.983, "step": 5000 }, { "epoch": 0.66, "grad_norm": 4.54200553894043, "learning_rate": 4.933934583934584e-05, "loss": 0.5008, "step": 5500 }, { "epoch": 0.66, "eval_f1": 0.4594232264185665, "eval_loss": 0.7507085204124451, "eval_precision": 0.44070483572560937, "eval_recall": 0.47984452823162504, "eval_runtime": 5.932, "eval_samples_per_second": 148.011, "eval_steps_per_second": 9.272, "step": 5500 }, { "epoch": 0.72, "grad_norm": 4.075632095336914, "learning_rate": 4.927922077922078e-05, "loss": 0.4966, "step": 6000 }, { "epoch": 0.72, "eval_f1": 0.6310991936984806, "eval_loss": 0.8238988518714905, "eval_precision": 0.6139657275796522, "eval_recall": 0.6767434715821813, "eval_runtime": 5.8918, "eval_samples_per_second": 149.02, "eval_steps_per_second": 9.335, "step": 6000 }, { "epoch": 0.78, "grad_norm": 4.8725104331970215, "learning_rate": 4.921909571909572e-05, "loss": 0.4791, "step": 6500 }, { "epoch": 0.78, "eval_f1": 0.5412559573187593, "eval_loss": 0.7028306722640991, "eval_precision": 0.6567775474615866, "eval_recall": 0.520631196760229, "eval_runtime": 6.3113, "eval_samples_per_second": 139.116, "eval_steps_per_second": 8.715, "step": 6500 }, { "epoch": 0.84, "grad_norm": 1.4915893077850342, "learning_rate": 4.915897065897066e-05, "loss": 0.494, "step": 7000 }, { "epoch": 0.84, "eval_f1": 0.5227267406470947, "eval_loss": 0.8033522367477417, "eval_precision": 0.6660302960734323, "eval_recall": 0.5188623562817111, "eval_runtime": 6.1252, "eval_samples_per_second": 143.342, "eval_steps_per_second": 8.979, "step": 7000 }, { "epoch": 0.9, "grad_norm": 2.151014804840088, "learning_rate": 4.90988455988456e-05, "loss": 0.4861, "step": 7500 }, { "epoch": 0.9, "eval_f1": 0.4541201667750796, "eval_loss": 0.900325357913971, "eval_precision": 0.5780562441152168, "eval_recall": 0.4784564539403249, "eval_runtime": 6.144, "eval_samples_per_second": 142.903, "eval_steps_per_second": 8.952, "step": 7500 }, { "epoch": 0.96, "grad_norm": 4.770496368408203, "learning_rate": 4.903872053872054e-05, "loss": 0.4804, "step": 8000 }, { "epoch": 0.96, "eval_f1": 0.5791890202588422, "eval_loss": 0.773960530757904, "eval_precision": 0.6238945275403609, "eval_recall": 0.5775003491132523, "eval_runtime": 6.556, "eval_samples_per_second": 133.923, "eval_steps_per_second": 8.389, "step": 8000 }, { "epoch": 1.02, "grad_norm": 2.520460367202759, "learning_rate": 4.897859547859548e-05, "loss": 0.4614, "step": 8500 }, { "epoch": 1.02, "eval_f1": 0.6470888284841774, "eval_loss": 0.7397181391716003, "eval_precision": 0.6848151355984641, "eval_recall": 0.6312358609132803, "eval_runtime": 6.1813, "eval_samples_per_second": 142.042, "eval_steps_per_second": 8.898, "step": 8500 }, { "epoch": 1.08, "grad_norm": 4.375688552856445, "learning_rate": 4.891847041847042e-05, "loss": 0.4315, "step": 9000 }, { "epoch": 1.08, "eval_f1": 0.614857769662433, "eval_loss": 0.788919985294342, "eval_precision": 0.6641593406916259, "eval_recall": 0.6034743750872783, "eval_runtime": 6.1798, "eval_samples_per_second": 142.076, "eval_steps_per_second": 8.9, "step": 9000 }, { "epoch": 1.14, "grad_norm": 4.091088771820068, "learning_rate": 4.885834535834536e-05, "loss": 0.4506, "step": 9500 }, { "epoch": 1.14, "eval_f1": 0.4967964786589283, "eval_loss": 0.8783875703811646, "eval_precision": 0.6387377173091459, "eval_recall": 0.5016645719871526, "eval_runtime": 5.9164, "eval_samples_per_second": 148.401, "eval_steps_per_second": 9.296, "step": 9500 }, { "epoch": 1.2, "grad_norm": 3.3903276920318604, "learning_rate": 4.87982202982203e-05, "loss": 0.4489, "step": 10000 }, { "epoch": 1.2, "eval_f1": 0.4949153076705755, "eval_loss": 0.7994188070297241, "eval_precision": 0.5340329579250159, "eval_recall": 0.49638597961178615, "eval_runtime": 5.9029, "eval_samples_per_second": 148.74, "eval_steps_per_second": 9.317, "step": 10000 }, { "epoch": 1.26, "grad_norm": 3.929879903793335, "learning_rate": 4.8738095238095235e-05, "loss": 0.4466, "step": 10500 }, { "epoch": 1.26, "eval_f1": 0.44642812881455524, "eval_loss": 0.8109920024871826, "eval_precision": 0.5776119229607602, "eval_recall": 0.47351207931853095, "eval_runtime": 5.9766, "eval_samples_per_second": 146.907, "eval_steps_per_second": 9.203, "step": 10500 }, { "epoch": 1.32, "grad_norm": 6.443171501159668, "learning_rate": 4.8677970177970176e-05, "loss": 0.4319, "step": 11000 }, { "epoch": 1.32, "eval_f1": 0.5481427288492505, "eval_loss": 0.8068605661392212, "eval_precision": 0.6612496177619213, "eval_recall": 0.5399497276916632, "eval_runtime": 5.9001, "eval_samples_per_second": 148.811, "eval_steps_per_second": 9.322, "step": 11000 }, { "epoch": 1.38, "grad_norm": 7.633645057678223, "learning_rate": 4.8617845117845116e-05, "loss": 0.4243, "step": 11500 }, { "epoch": 1.38, "eval_f1": 0.5797306372413114, "eval_loss": 0.7941620349884033, "eval_precision": 0.5948358635007136, "eval_recall": 0.5704752595075175, "eval_runtime": 6.145, "eval_samples_per_second": 142.881, "eval_steps_per_second": 8.95, "step": 11500 }, { "epoch": 1.44, "grad_norm": 3.275371789932251, "learning_rate": 4.8557720057720056e-05, "loss": 0.4398, "step": 12000 }, { "epoch": 1.44, "eval_f1": 0.5247242844808815, "eval_loss": 0.9738017916679382, "eval_precision": 0.5370369073777802, "eval_recall": 0.6070139179816599, "eval_runtime": 6.219, "eval_samples_per_second": 141.18, "eval_steps_per_second": 8.844, "step": 12000 }, { "epoch": 1.5, "grad_norm": 2.4162724018096924, "learning_rate": 4.8497594997595e-05, "loss": 0.4526, "step": 12500 }, { "epoch": 1.5, "eval_f1": 0.5589742980399895, "eval_loss": 0.7195601463317871, "eval_precision": 0.7046240283838195, "eval_recall": 0.5477959316668994, "eval_runtime": 6.3918, "eval_samples_per_second": 137.363, "eval_steps_per_second": 8.605, "step": 12500 }, { "epoch": 1.56, "grad_norm": 6.926381587982178, "learning_rate": 4.8437469937469944e-05, "loss": 0.4529, "step": 13000 }, { "epoch": 1.56, "eval_f1": 0.5863097712686139, "eval_loss": 0.8049713969230652, "eval_precision": 0.6419448505612538, "eval_recall": 0.5730605595121724, "eval_runtime": 6.3636, "eval_samples_per_second": 137.971, "eval_steps_per_second": 8.643, "step": 13000 }, { "epoch": 1.62, "grad_norm": 1.8420650959014893, "learning_rate": 4.837746512746513e-05, "loss": 0.446, "step": 13500 }, { "epoch": 1.62, "eval_f1": 0.6107236144330398, "eval_loss": 0.7564206719398499, "eval_precision": 0.6520992658162544, "eval_recall": 0.5912358609132803, "eval_runtime": 6.4128, "eval_samples_per_second": 136.914, "eval_steps_per_second": 8.577, "step": 13500 }, { "epoch": 1.68, "grad_norm": 2.423569679260254, "learning_rate": 4.831746031746032e-05, "loss": 0.4315, "step": 14000 }, { "epoch": 1.68, "eval_f1": 0.621245910301715, "eval_loss": 0.751511812210083, "eval_precision": 0.6474767054531395, "eval_recall": 0.6069198901456967, "eval_runtime": 5.9833, "eval_samples_per_second": 146.741, "eval_steps_per_second": 9.192, "step": 14000 }, { "epoch": 1.74, "grad_norm": 6.773381233215332, "learning_rate": 4.825733525733526e-05, "loss": 0.4464, "step": 14500 }, { "epoch": 1.74, "eval_f1": 0.559868694735591, "eval_loss": 0.8307517170906067, "eval_precision": 0.627583612882644, "eval_recall": 0.5512991667830377, "eval_runtime": 6.1679, "eval_samples_per_second": 142.35, "eval_steps_per_second": 8.917, "step": 14500 }, { "epoch": 1.8, "grad_norm": 6.220128059387207, "learning_rate": 4.8197330447330455e-05, "loss": 0.4423, "step": 15000 }, { "epoch": 1.8, "eval_f1": 0.5991996711711277, "eval_loss": 0.798150360584259, "eval_precision": 0.6176196711770697, "eval_recall": 0.5936535865568123, "eval_runtime": 6.0738, "eval_samples_per_second": 144.556, "eval_steps_per_second": 9.055, "step": 15000 }, { "epoch": 1.86, "grad_norm": 1.1065833568572998, "learning_rate": 4.8137205387205395e-05, "loss": 0.4551, "step": 15500 }, { "epoch": 1.86, "eval_f1": 0.6019748538222912, "eval_loss": 0.822293221950531, "eval_precision": 0.6355921902599784, "eval_recall": 0.5933528836754642, "eval_runtime": 6.1197, "eval_samples_per_second": 143.472, "eval_steps_per_second": 8.987, "step": 15500 }, { "epoch": 1.92, "grad_norm": 8.631648063659668, "learning_rate": 4.807708032708033e-05, "loss": 0.4408, "step": 16000 }, { "epoch": 1.92, "eval_f1": 0.5131249172090748, "eval_loss": 0.7691208124160767, "eval_precision": 0.608759764068229, "eval_recall": 0.5147484057161477, "eval_runtime": 6.3609, "eval_samples_per_second": 138.031, "eval_steps_per_second": 8.647, "step": 16000 }, { "epoch": 1.98, "grad_norm": 6.755849361419678, "learning_rate": 4.801695526695527e-05, "loss": 0.4389, "step": 16500 }, { "epoch": 1.98, "eval_f1": 0.6702519892656928, "eval_loss": 0.6971784234046936, "eval_precision": 0.6686766810877821, "eval_recall": 0.6729106735558349, "eval_runtime": 6.1341, "eval_samples_per_second": 143.134, "eval_steps_per_second": 8.966, "step": 16500 } ], "logging_steps": 500, "max_steps": 415800, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 6.945931114601472e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }