|
{ |
|
"best_metric": 0.9745301568430197, |
|
"best_model_checkpoint": "models/pos_final_xlm_fr/checkpoint-448", |
|
"epoch": 39.94915254237288, |
|
"global_step": 560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.0025780833283944762, |
|
"eval_f1": 0.0, |
|
"eval_loss": 3.5537023544311523, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 2.1805, |
|
"eval_samples_per_second": 760.825, |
|
"eval_steps_per_second": 3.21, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_accuracy": 0.004948734664849168, |
|
"eval_f1": 0.004161179878851726, |
|
"eval_loss": 3.453566551208496, |
|
"eval_precision": 0.01528046421663443, |
|
"eval_recall": 0.0024085365853658536, |
|
"eval_runtime": 2.2293, |
|
"eval_samples_per_second": 744.195, |
|
"eval_steps_per_second": 3.14, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_accuracy": 0.2843003615243288, |
|
"eval_f1": 0.2065520889335946, |
|
"eval_loss": 3.1247434616088867, |
|
"eval_precision": 0.23953179679015324, |
|
"eval_recall": 0.18155487804878048, |
|
"eval_runtime": 2.0115, |
|
"eval_samples_per_second": 824.764, |
|
"eval_steps_per_second": 3.48, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_accuracy": 0.45430569548983585, |
|
"eval_f1": 0.3899751394208157, |
|
"eval_loss": 2.598811149597168, |
|
"eval_precision": 0.4342361215023193, |
|
"eval_recall": 0.3539024390243902, |
|
"eval_runtime": 2.3441, |
|
"eval_samples_per_second": 707.74, |
|
"eval_steps_per_second": 2.986, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_accuracy": 0.5148462039945475, |
|
"eval_f1": 0.454724342663274, |
|
"eval_loss": 2.0168328285217285, |
|
"eval_precision": 0.5125430210325048, |
|
"eval_recall": 0.4086280487804878, |
|
"eval_runtime": 2.0095, |
|
"eval_samples_per_second": 825.594, |
|
"eval_steps_per_second": 3.484, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"eval_accuracy": 0.6085758312096249, |
|
"eval_f1": 0.5542561693660192, |
|
"eval_loss": 1.4837758541107178, |
|
"eval_precision": 0.5959454245729718, |
|
"eval_recall": 0.5180182926829269, |
|
"eval_runtime": 2.0097, |
|
"eval_samples_per_second": 825.481, |
|
"eval_steps_per_second": 3.483, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"eval_accuracy": 0.7980501392757661, |
|
"eval_f1": 0.7759372816890475, |
|
"eval_loss": 0.9300474524497986, |
|
"eval_precision": 0.7904792029100111, |
|
"eval_recall": 0.7619207317073171, |
|
"eval_runtime": 2.0866, |
|
"eval_samples_per_second": 795.072, |
|
"eval_steps_per_second": 3.355, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_accuracy": 0.9146565518876312, |
|
"eval_f1": 0.909426874150543, |
|
"eval_loss": 0.4873865842819214, |
|
"eval_precision": 0.911054676743261, |
|
"eval_recall": 0.9078048780487805, |
|
"eval_runtime": 2.0506, |
|
"eval_samples_per_second": 809.029, |
|
"eval_steps_per_second": 3.414, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"eval_accuracy": 0.9395780240621111, |
|
"eval_f1": 0.9370149572323785, |
|
"eval_loss": 0.2940390408039093, |
|
"eval_precision": 0.9372007197974808, |
|
"eval_recall": 0.936829268292683, |
|
"eval_runtime": 2.4026, |
|
"eval_samples_per_second": 690.513, |
|
"eval_steps_per_second": 2.914, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_accuracy": 0.9490309962662241, |
|
"eval_f1": 0.9476378372613843, |
|
"eval_loss": 0.20862668752670288, |
|
"eval_precision": 0.9470751240902585, |
|
"eval_recall": 0.9482012195121952, |
|
"eval_runtime": 2.1173, |
|
"eval_samples_per_second": 783.547, |
|
"eval_steps_per_second": 3.306, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"eval_accuracy": 0.96272150773425, |
|
"eval_f1": 0.9601852133914156, |
|
"eval_loss": 0.16879160702228546, |
|
"eval_precision": 0.9593961161502405, |
|
"eval_recall": 0.9609756097560975, |
|
"eval_runtime": 2.6297, |
|
"eval_samples_per_second": 630.882, |
|
"eval_steps_per_second": 2.662, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"eval_accuracy": 0.9659218870384638, |
|
"eval_f1": 0.9632472774350773, |
|
"eval_loss": 0.1449553668498993, |
|
"eval_precision": 0.9624410287627454, |
|
"eval_recall": 0.9640548780487805, |
|
"eval_runtime": 2.1659, |
|
"eval_samples_per_second": 765.976, |
|
"eval_steps_per_second": 3.232, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"eval_accuracy": 0.9685592366502697, |
|
"eval_f1": 0.9659930249912432, |
|
"eval_loss": 0.13338540494441986, |
|
"eval_precision": 0.9650670967349299, |
|
"eval_recall": 0.9669207317073171, |
|
"eval_runtime": 2.0965, |
|
"eval_samples_per_second": 791.329, |
|
"eval_steps_per_second": 3.339, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"eval_accuracy": 0.9702483257274936, |
|
"eval_f1": 0.9679444207447133, |
|
"eval_loss": 0.12125352025032043, |
|
"eval_precision": 0.9674138141064685, |
|
"eval_recall": 0.9684756097560976, |
|
"eval_runtime": 2.0848, |
|
"eval_samples_per_second": 795.757, |
|
"eval_steps_per_second": 3.358, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"eval_accuracy": 0.9718188822378948, |
|
"eval_f1": 0.9689970901445788, |
|
"eval_loss": 0.11550796031951904, |
|
"eval_precision": 0.9684216937178355, |
|
"eval_recall": 0.9695731707317073, |
|
"eval_runtime": 2.1294, |
|
"eval_samples_per_second": 779.097, |
|
"eval_steps_per_second": 3.287, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"eval_accuracy": 0.9733598056065904, |
|
"eval_f1": 0.9709382953108094, |
|
"eval_loss": 0.10925151407718658, |
|
"eval_precision": 0.9706572412322131, |
|
"eval_recall": 0.9712195121951219, |
|
"eval_runtime": 2.0792, |
|
"eval_samples_per_second": 797.884, |
|
"eval_steps_per_second": 3.367, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_accuracy": 0.9739228352989984, |
|
"eval_f1": 0.9713353956812605, |
|
"eval_loss": 0.10589804500341415, |
|
"eval_precision": 0.9710246488528685, |
|
"eval_recall": 0.9716463414634147, |
|
"eval_runtime": 3.3331, |
|
"eval_samples_per_second": 497.732, |
|
"eval_steps_per_second": 2.1, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"eval_accuracy": 0.973952468440704, |
|
"eval_f1": 0.9713650009905667, |
|
"eval_loss": 0.10459830611944199, |
|
"eval_precision": 0.9710838233949846, |
|
"eval_recall": 0.9716463414634147, |
|
"eval_runtime": 2.0698, |
|
"eval_samples_per_second": 801.518, |
|
"eval_steps_per_second": 3.382, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"eval_accuracy": 0.9745154981331121, |
|
"eval_f1": 0.9721574543958305, |
|
"eval_loss": 0.10144730657339096, |
|
"eval_precision": 0.9718760474115604, |
|
"eval_recall": 0.9724390243902439, |
|
"eval_runtime": 2.3781, |
|
"eval_samples_per_second": 697.618, |
|
"eval_steps_per_second": 2.944, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"eval_accuracy": 0.9741599004326439, |
|
"eval_f1": 0.971824761904762, |
|
"eval_loss": 0.1002810001373291, |
|
"eval_precision": 0.9714546839299315, |
|
"eval_recall": 0.9721951219512195, |
|
"eval_runtime": 2.3329, |
|
"eval_samples_per_second": 711.141, |
|
"eval_steps_per_second": 3.001, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"eval_accuracy": 0.9750785278255201, |
|
"eval_f1": 0.9727077396644468, |
|
"eval_loss": 0.09868494421243668, |
|
"eval_precision": 0.9723669378179934, |
|
"eval_recall": 0.9730487804878049, |
|
"eval_runtime": 2.094, |
|
"eval_samples_per_second": 792.279, |
|
"eval_steps_per_second": 3.343, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 21.95, |
|
"eval_accuracy": 0.9750192615421087, |
|
"eval_f1": 0.9725075436618001, |
|
"eval_loss": 0.09709486365318298, |
|
"eval_precision": 0.9722408434395758, |
|
"eval_recall": 0.9727743902439024, |
|
"eval_runtime": 2.3309, |
|
"eval_samples_per_second": 711.747, |
|
"eval_steps_per_second": 3.003, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"eval_accuracy": 0.9753748592425769, |
|
"eval_f1": 0.9729877965172083, |
|
"eval_loss": 0.09679476916790009, |
|
"eval_precision": 0.9724396260316107, |
|
"eval_recall": 0.9735365853658536, |
|
"eval_runtime": 2.3344, |
|
"eval_samples_per_second": 710.679, |
|
"eval_steps_per_second": 2.999, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"eval_accuracy": 0.975641557517928, |
|
"eval_f1": 0.9732274825910068, |
|
"eval_loss": 0.09535854309797287, |
|
"eval_precision": 0.9728272458646846, |
|
"eval_recall": 0.9736280487804878, |
|
"eval_runtime": 3.076, |
|
"eval_samples_per_second": 539.342, |
|
"eval_steps_per_second": 2.276, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"eval_accuracy": 0.9751970603923428, |
|
"eval_f1": 0.9726641067836899, |
|
"eval_loss": 0.09671631455421448, |
|
"eval_precision": 0.9722492993785793, |
|
"eval_recall": 0.9730792682926829, |
|
"eval_runtime": 2.2902, |
|
"eval_samples_per_second": 724.376, |
|
"eval_steps_per_second": 3.056, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"eval_accuracy": 0.9762638534937474, |
|
"eval_f1": 0.9739440482720789, |
|
"eval_loss": 0.09651771187782288, |
|
"eval_precision": 0.9735286950164493, |
|
"eval_recall": 0.974359756097561, |
|
"eval_runtime": 2.3507, |
|
"eval_samples_per_second": 705.738, |
|
"eval_steps_per_second": 2.978, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 26.95, |
|
"eval_accuracy": 0.9756711906596337, |
|
"eval_f1": 0.973000975134081, |
|
"eval_loss": 0.09634628146886826, |
|
"eval_precision": 0.9725268031189084, |
|
"eval_recall": 0.9734756097560976, |
|
"eval_runtime": 2.1161, |
|
"eval_samples_per_second": 783.981, |
|
"eval_steps_per_second": 3.308, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 27.95, |
|
"eval_accuracy": 0.9759082557932792, |
|
"eval_f1": 0.9732752315943443, |
|
"eval_loss": 0.09717196971178055, |
|
"eval_precision": 0.9728009259259259, |
|
"eval_recall": 0.97375, |
|
"eval_runtime": 2.4757, |
|
"eval_samples_per_second": 670.113, |
|
"eval_steps_per_second": 2.827, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"eval_accuracy": 0.9765601849108042, |
|
"eval_f1": 0.9740214228032484, |
|
"eval_loss": 0.09867348521947861, |
|
"eval_precision": 0.9735615729036581, |
|
"eval_recall": 0.9744817073170732, |
|
"eval_runtime": 2.2832, |
|
"eval_samples_per_second": 726.617, |
|
"eval_steps_per_second": 3.066, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 29.95, |
|
"eval_accuracy": 0.9764416523439815, |
|
"eval_f1": 0.9739541263430618, |
|
"eval_loss": 0.09944748878479004, |
|
"eval_precision": 0.9737315252171264, |
|
"eval_recall": 0.9741768292682926, |
|
"eval_runtime": 2.2486, |
|
"eval_samples_per_second": 737.785, |
|
"eval_steps_per_second": 3.113, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 30.95, |
|
"eval_accuracy": 0.9763527529188645, |
|
"eval_f1": 0.9738618871260268, |
|
"eval_loss": 0.09847575426101685, |
|
"eval_precision": 0.9736689726632737, |
|
"eval_recall": 0.9740548780487804, |
|
"eval_runtime": 2.8613, |
|
"eval_samples_per_second": 579.816, |
|
"eval_steps_per_second": 2.446, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 31.95, |
|
"eval_accuracy": 0.9768565163278611, |
|
"eval_f1": 0.9745301568430197, |
|
"eval_loss": 0.10217323899269104, |
|
"eval_precision": 0.9744261895327216, |
|
"eval_recall": 0.9746341463414634, |
|
"eval_runtime": 2.2672, |
|
"eval_samples_per_second": 731.743, |
|
"eval_steps_per_second": 3.088, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 32.95, |
|
"eval_accuracy": 0.9767379837610384, |
|
"eval_f1": 0.9742120343839542, |
|
"eval_loss": 0.10195796191692352, |
|
"eval_precision": 0.9740338900402292, |
|
"eval_recall": 0.974390243902439, |
|
"eval_runtime": 2.0866, |
|
"eval_samples_per_second": 795.073, |
|
"eval_steps_per_second": 3.355, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 33.95, |
|
"eval_accuracy": 0.9757897232264565, |
|
"eval_f1": 0.9734095238095238, |
|
"eval_loss": 0.10545694828033447, |
|
"eval_precision": 0.9730388423457731, |
|
"eval_recall": 0.973780487804878, |
|
"eval_runtime": 2.3249, |
|
"eval_samples_per_second": 713.591, |
|
"eval_steps_per_second": 3.011, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 34.95, |
|
"eval_accuracy": 0.9760267883601019, |
|
"eval_f1": 0.9736721821007404, |
|
"eval_loss": 0.10677994042634964, |
|
"eval_precision": 0.9731680575013705, |
|
"eval_recall": 0.9741768292682926, |
|
"eval_runtime": 2.2489, |
|
"eval_samples_per_second": 737.696, |
|
"eval_steps_per_second": 3.113, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 35.68, |
|
"learning_rate": 4.9800000000000004e-05, |
|
"loss": 0.6768, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 35.95, |
|
"eval_accuracy": 0.9764416523439815, |
|
"eval_f1": 0.9738759335467154, |
|
"eval_loss": 0.10853772610425949, |
|
"eval_precision": 0.9737275220969217, |
|
"eval_recall": 0.9740243902439024, |
|
"eval_runtime": 2.1342, |
|
"eval_samples_per_second": 777.355, |
|
"eval_steps_per_second": 3.28, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 36.95, |
|
"eval_accuracy": 0.9763527529188645, |
|
"eval_f1": 0.9738987337914641, |
|
"eval_loss": 0.1088031679391861, |
|
"eval_precision": 0.9734685796094916, |
|
"eval_recall": 0.974329268292683, |
|
"eval_runtime": 2.3204, |
|
"eval_samples_per_second": 714.977, |
|
"eval_steps_per_second": 3.017, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 37.95, |
|
"eval_accuracy": 0.976767616902744, |
|
"eval_f1": 0.9741534335090981, |
|
"eval_loss": 0.10998602956533432, |
|
"eval_precision": 0.9738862819184594, |
|
"eval_recall": 0.974420731707317, |
|
"eval_runtime": 2.2182, |
|
"eval_samples_per_second": 747.919, |
|
"eval_steps_per_second": 3.156, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 38.95, |
|
"eval_accuracy": 0.9767379837610384, |
|
"eval_f1": 0.9741690668861153, |
|
"eval_loss": 0.11069974303245544, |
|
"eval_precision": 0.9738870776074835, |
|
"eval_recall": 0.9744512195121952, |
|
"eval_runtime": 2.083, |
|
"eval_samples_per_second": 796.451, |
|
"eval_steps_per_second": 3.361, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 39.95, |
|
"eval_accuracy": 0.9768861494695668, |
|
"eval_f1": 0.9743683520770474, |
|
"eval_loss": 0.11153056472539902, |
|
"eval_precision": 0.9740418012308817, |
|
"eval_recall": 0.9746951219512195, |
|
"eval_runtime": 2.295, |
|
"eval_samples_per_second": 722.863, |
|
"eval_steps_per_second": 3.05, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 39.95, |
|
"step": 560, |
|
"total_flos": 4.190909533741901e+16, |
|
"train_loss": 0.6079375518219812, |
|
"train_runtime": 701.2451, |
|
"train_samples_per_second": 851.514, |
|
"train_steps_per_second": 0.799 |
|
} |
|
], |
|
"max_steps": 560, |
|
"num_train_epochs": 40, |
|
"total_flos": 4.190909533741901e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|