{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 8.300418853759766, "learning_rate": 4.75e-05, "loss": 0.4092, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.7892045810686176, "eval_loss": 0.3456897437572479, "eval_precision": 0.8929804104477612, "eval_recall": 0.7553646117475905, "eval_runtime": 1.6637, "eval_samples_per_second": 239.829, "eval_steps_per_second": 30.054, "step": 122 }, { "epoch": 2.0, "grad_norm": 9.286004066467285, "learning_rate": 4.5e-05, "loss": 0.2282, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8676331036823873, "eval_loss": 0.258427232503891, "eval_precision": 0.8749292230261088, "eval_recall": 0.8612474995453718, "eval_runtime": 1.6592, "eval_samples_per_second": 240.478, "eval_steps_per_second": 30.135, "step": 244 }, { "epoch": 3.0, "grad_norm": 13.956122398376465, "learning_rate": 4.25e-05, "loss": 0.138, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8429546050905274, "eval_loss": 0.44172462821006775, "eval_precision": 0.882529902138456, "eval_recall": 0.8198763411529368, "eval_runtime": 1.6567, "eval_samples_per_second": 240.84, "eval_steps_per_second": 30.18, "step": 366 }, { "epoch": 4.0, "grad_norm": 0.09012622386217117, "learning_rate": 4e-05, "loss": 0.0837, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8793019197207679, "eval_loss": 0.4037090241909027, "eval_precision": 0.8893184421534936, "eval_recall": 0.8708401527550463, "eval_runtime": 1.6737, "eval_samples_per_second": 238.388, "eval_steps_per_second": 29.873, "step": 488 }, { "epoch": 5.0, "grad_norm": 5.347772121429443, "learning_rate": 3.7500000000000003e-05, "loss": 0.0426, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8873149414352814, "eval_loss": 0.5462044477462769, "eval_precision": 0.8806277372262774, "eval_recall": 0.8951172940534643, "eval_runtime": 1.6751, "eval_samples_per_second": 238.188, "eval_steps_per_second": 29.848, "step": 610 }, { "epoch": 6.0, "grad_norm": 0.011684279888868332, "learning_rate": 3.5e-05, "loss": 0.0502, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8707140332272888, "eval_loss": 0.5626452565193176, "eval_precision": 0.8618432385874246, "eval_recall": 0.8819785415530097, "eval_runtime": 1.6815, "eval_samples_per_second": 237.294, "eval_steps_per_second": 29.736, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.015737071633338928, "learning_rate": 3.2500000000000004e-05, "loss": 0.0242, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8848664457009163, "eval_loss": 0.6241247653961182, "eval_precision": 0.8977236138837015, "eval_recall": 0.8743862520458265, "eval_runtime": 1.6765, "eval_samples_per_second": 238.002, "eval_steps_per_second": 29.825, "step": 854 }, { "epoch": 8.0, "grad_norm": 0.004997015465050936, "learning_rate": 3e-05, "loss": 0.0217, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8692251105268142, "eval_loss": 0.7096332907676697, "eval_precision": 0.8579132638693325, "eval_recall": 0.885206401163848, "eval_runtime": 1.6742, "eval_samples_per_second": 238.324, "eval_steps_per_second": 29.865, "step": 976 }, { "epoch": 9.0, "grad_norm": 0.004219838418066502, "learning_rate": 2.7500000000000004e-05, "loss": 0.0229, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.9122807017543859, "eval_f1": 0.8954723392788977, "eval_loss": 0.611499547958374, "eval_precision": 0.8909569746108776, "eval_recall": 0.9004364429896345, "eval_runtime": 1.66, "eval_samples_per_second": 240.365, "eval_steps_per_second": 30.121, "step": 1098 }, { "epoch": 10.0, "grad_norm": 0.006534805987030268, "learning_rate": 2.5e-05, "loss": 0.0109, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8744522298370696, "eval_loss": 0.7575166821479797, "eval_precision": 0.8795731707317074, "eval_recall": 0.8697945080923805, "eval_runtime": 1.6781, "eval_samples_per_second": 237.769, "eval_steps_per_second": 29.796, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.00184684619307518, "learning_rate": 2.25e-05, "loss": 0.0068, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8861029031685659, "eval_loss": 0.7536790370941162, "eval_precision": 0.8937558980811576, "eval_recall": 0.879387161302055, "eval_runtime": 1.6764, "eval_samples_per_second": 238.006, "eval_steps_per_second": 29.825, "step": 1342 }, { "epoch": 12.0, "grad_norm": 0.01189060416072607, "learning_rate": 2e-05, "loss": 0.0131, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8775533117267087, "eval_loss": 0.7247006297111511, "eval_precision": 0.873246730188791, "eval_recall": 0.8822967812329514, "eval_runtime": 1.6752, "eval_samples_per_second": 238.178, "eval_steps_per_second": 29.847, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.0015333497431129217, "learning_rate": 1.75e-05, "loss": 0.0101, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8763538792940554, "eval_loss": 0.7927835583686829, "eval_precision": 0.8754297605404427, "eval_recall": 0.877295871976723, "eval_runtime": 1.6839, "eval_samples_per_second": 236.944, "eval_steps_per_second": 29.692, "step": 1586 }, { "epoch": 14.0, "grad_norm": 0.0037907068617641926, "learning_rate": 1.5e-05, "loss": 0.0061, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8884169154604891, "eval_loss": 0.784883975982666, "eval_precision": 0.8874630556728391, "eval_recall": 0.8893889798145117, "eval_runtime": 1.6745, "eval_samples_per_second": 238.284, "eval_steps_per_second": 29.86, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.0031544596422463655, "learning_rate": 1.25e-05, "loss": 0.0135, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8731122745782431, "eval_loss": 0.781574010848999, "eval_precision": 0.8829705994654449, "eval_recall": 0.864793598836152, "eval_runtime": 1.6721, "eval_samples_per_second": 238.624, "eval_steps_per_second": 29.903, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.0015803646529093385, "learning_rate": 1e-05, "loss": 0.0081, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8757339815412664, "eval_loss": 0.7727145552635193, "eval_precision": 0.8766906299500427, "eval_recall": 0.8747954173486088, "eval_runtime": 1.6771, "eval_samples_per_second": 237.909, "eval_steps_per_second": 29.813, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.00223415601067245, "learning_rate": 7.5e-06, "loss": 0.0027, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8763538792940554, "eval_loss": 0.81281578540802, "eval_precision": 0.8754297605404427, "eval_recall": 0.877295871976723, "eval_runtime": 1.6614, "eval_samples_per_second": 240.156, "eval_steps_per_second": 30.095, "step": 2074 }, { "epoch": 18.0, "grad_norm": 0.0023393542505800724, "learning_rate": 5e-06, "loss": 0.0041, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8817957385392532, "eval_loss": 0.8081415891647339, "eval_precision": 0.8827677592299257, "eval_recall": 0.8808419712675032, "eval_runtime": 1.6587, "eval_samples_per_second": 240.553, "eval_steps_per_second": 30.145, "step": 2196 }, { "epoch": 19.0, "grad_norm": 0.0010473760776221752, "learning_rate": 2.5e-06, "loss": 0.0018, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8793019197207679, "eval_loss": 0.8038576245307922, "eval_precision": 0.8893184421534936, "eval_recall": 0.8708401527550463, "eval_runtime": 1.6566, "eval_samples_per_second": 240.849, "eval_steps_per_second": 30.182, "step": 2318 }, { "epoch": 20.0, "grad_norm": 0.0015798051608726382, "learning_rate": 0.0, "loss": 0.0025, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8799463033398397, "eval_loss": 0.803022563457489, "eval_precision": 0.8874803397294746, "eval_recall": 0.8733406073831607, "eval_runtime": 1.6611, "eval_samples_per_second": 240.198, "eval_steps_per_second": 30.1, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7584162436176000.0, "train_loss": 0.05501617935226589, "train_runtime": 867.5592, "train_samples_per_second": 83.867, "train_steps_per_second": 2.812 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7584162436176000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }