{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.6854329109191895, "learning_rate": 4.75e-05, "loss": 0.5593, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7243107769423559, "eval_f1": 0.6445578231292517, "eval_loss": 0.4990486800670624, "eval_precision": 0.6593400801180687, "eval_recall": 0.6374340789234406, "eval_runtime": 1.7299, "eval_samples_per_second": 230.654, "eval_steps_per_second": 28.904, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.5451161861419678, "learning_rate": 4.5e-05, "loss": 0.4939, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7619047619047619, "eval_f1": 0.7345904199079941, "eval_loss": 0.469597727060318, "eval_precision": 0.726530612244898, "eval_recall": 0.7590470994726314, "eval_runtime": 1.7342, "eval_samples_per_second": 230.072, "eval_steps_per_second": 28.831, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.6814606189727783, "learning_rate": 4.25e-05, "loss": 0.4469, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8070175438596491, "eval_f1": 0.7711286754024598, "eval_loss": 0.4036335051059723, "eval_precision": 0.7670087976539589, "eval_recall": 0.7759592653209675, "eval_runtime": 1.7345, "eval_samples_per_second": 230.031, "eval_steps_per_second": 28.826, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.360339403152466, "learning_rate": 4e-05, "loss": 0.3781, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8195488721804511, "eval_f1": 0.781223345924211, "eval_loss": 0.37482577562332153, "eval_precision": 0.7826852418860992, "eval_recall": 0.7798236042916894, "eval_runtime": 1.7327, "eval_samples_per_second": 230.273, "eval_steps_per_second": 28.856, "step": 488 }, { "epoch": 5.0, "grad_norm": 4.245613098144531, "learning_rate": 3.7500000000000003e-05, "loss": 0.3532, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8045112781954887, "eval_f1": 0.7792390194075587, "eval_loss": 0.41098225116729736, "eval_precision": 0.7687074829931972, "eval_recall": 0.8016912165848336, "eval_runtime": 1.735, "eval_samples_per_second": 229.967, "eval_steps_per_second": 28.818, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.282329559326172, "learning_rate": 3.5e-05, "loss": 0.3273, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8320802005012531, "eval_f1": 0.8035739215412536, "eval_loss": 0.3612072467803955, "eval_precision": 0.7959679725478982, "eval_recall": 0.8136933987997819, "eval_runtime": 1.7217, "eval_samples_per_second": 231.742, "eval_steps_per_second": 29.04, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.6019047498703003, "learning_rate": 3.2500000000000004e-05, "loss": 0.3098, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.809437386569873, "eval_loss": 0.35519886016845703, "eval_precision": 0.8016728624535316, "eval_recall": 0.8197399527186762, "eval_runtime": 1.7241, "eval_samples_per_second": 231.424, "eval_steps_per_second": 29.0, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.789701461791992, "learning_rate": 3e-05, "loss": 0.2953, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8255172205802521, "eval_loss": 0.3437612056732178, "eval_precision": 0.8239495798319327, "eval_recall": 0.8271503909801782, "eval_runtime": 1.7273, "eval_samples_per_second": 230.998, "eval_steps_per_second": 28.947, "step": 976 }, { "epoch": 9.0, "grad_norm": 4.741625785827637, "learning_rate": 2.7500000000000004e-05, "loss": 0.28, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8186363636363636, "eval_loss": 0.3589649498462677, "eval_precision": 0.8101857126580836, "eval_recall": 0.8300600109110747, "eval_runtime": 1.7261, "eval_samples_per_second": 231.162, "eval_steps_per_second": 28.968, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.4551422595977783, "learning_rate": 2.5e-05, "loss": 0.2701, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8254579780661698, "eval_loss": 0.33537063002586365, "eval_precision": 0.8299216027874565, "eval_recall": 0.8214220767412257, "eval_runtime": 1.7233, "eval_samples_per_second": 231.529, "eval_steps_per_second": 29.014, "step": 1220 }, { "epoch": 11.0, "grad_norm": 4.120199680328369, "learning_rate": 2.25e-05, "loss": 0.2694, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8272399255573702, "eval_loss": 0.33660534024238586, "eval_precision": 0.8280735957109784, "eval_recall": 0.8264229859974541, "eval_runtime": 1.7322, "eval_samples_per_second": 230.349, "eval_steps_per_second": 28.866, "step": 1342 }, { "epoch": 12.0, "grad_norm": 7.375921249389648, "learning_rate": 2e-05, "loss": 0.2657, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8331541218637993, "eval_loss": 0.33782631158828735, "eval_precision": 0.8286938847649346, "eval_recall": 0.838197854155301, "eval_runtime": 1.7267, "eval_samples_per_second": 231.075, "eval_steps_per_second": 28.957, "step": 1464 }, { "epoch": 13.0, "grad_norm": 2.055612325668335, "learning_rate": 1.75e-05, "loss": 0.2603, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8359175094431583, "eval_loss": 0.3295312225818634, "eval_precision": 0.8376607470912432, "eval_recall": 0.8342425895617385, "eval_runtime": 1.7293, "eval_samples_per_second": 230.724, "eval_steps_per_second": 28.913, "step": 1586 }, { "epoch": 14.0, "grad_norm": 12.068136215209961, "learning_rate": 1.5e-05, "loss": 0.2564, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8315338681464504, "eval_loss": 0.3317546844482422, "eval_precision": 0.8299369747899159, "eval_recall": 0.8331969448990726, "eval_runtime": 1.7283, "eval_samples_per_second": 230.858, "eval_steps_per_second": 28.93, "step": 1708 }, { "epoch": 15.0, "grad_norm": 2.293285608291626, "learning_rate": 1.25e-05, "loss": 0.2583, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8315338681464504, "eval_loss": 0.32908886671066284, "eval_precision": 0.8299369747899159, "eval_recall": 0.8331969448990726, "eval_runtime": 1.7265, "eval_samples_per_second": 231.099, "eval_steps_per_second": 28.96, "step": 1830 }, { "epoch": 16.0, "grad_norm": 4.916107654571533, "learning_rate": 1e-05, "loss": 0.2438, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8297692382542047, "eval_loss": 0.33232972025871277, "eval_precision": 0.8260194117298929, "eval_recall": 0.8339243498817968, "eval_runtime": 1.7208, "eval_samples_per_second": 231.867, "eval_steps_per_second": 29.056, "step": 1952 }, { "epoch": 17.0, "grad_norm": 5.3932881355285645, "learning_rate": 7.5e-06, "loss": 0.2465, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8219298245614035, "eval_loss": 0.32245394587516785, "eval_precision": 0.8273273273273273, "eval_recall": 0.8171485724677214, "eval_runtime": 1.719, "eval_samples_per_second": 232.11, "eval_steps_per_second": 29.087, "step": 2074 }, { "epoch": 18.0, "grad_norm": 5.043487071990967, "learning_rate": 5e-06, "loss": 0.2494, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8289446964056049, "eval_loss": 0.3291957676410675, "eval_precision": 0.8266129032258065, "eval_recall": 0.8314238952536825, "eval_runtime": 1.7244, "eval_samples_per_second": 231.381, "eval_steps_per_second": 28.995, "step": 2196 }, { "epoch": 19.0, "grad_norm": 2.902482032775879, "learning_rate": 2.5e-06, "loss": 0.2382, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8281017346283209, "eval_loss": 0.32464247941970825, "eval_precision": 0.8272965800108572, "eval_recall": 0.8289234406255683, "eval_runtime": 1.7297, "eval_samples_per_second": 230.679, "eval_steps_per_second": 28.907, "step": 2318 }, { "epoch": 20.0, "grad_norm": 6.7515692710876465, "learning_rate": 0.0, "loss": 0.236, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8306964902709584, "eval_loss": 0.3235239088535309, "eval_precision": 0.8306964902709584, "eval_recall": 0.8306964902709584, "eval_runtime": 1.7296, "eval_samples_per_second": 230.692, "eval_steps_per_second": 28.909, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7609911792720000.0, "train_loss": 0.3118827554046131, "train_runtime": 624.9497, "train_samples_per_second": 116.425, "train_steps_per_second": 3.904 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7609911792720000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }