{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.483087062835693, "learning_rate": 4.75e-05, "loss": 0.5384, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7368421052631579, "eval_f1": 0.6617470147024391, "eval_loss": 0.49190446734428406, "eval_precision": 0.676996860830136, "eval_recall": 0.6538006910347336, "eval_runtime": 1.8289, "eval_samples_per_second": 218.165, "eval_steps_per_second": 27.339, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.5009255409240723, "learning_rate": 4.5e-05, "loss": 0.4212, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8245614035087719, "eval_f1": 0.804783605457392, "eval_loss": 0.41747915744781494, "eval_precision": 0.7930195663670017, "eval_recall": 0.8358792507728678, "eval_runtime": 1.8239, "eval_samples_per_second": 218.767, "eval_steps_per_second": 27.414, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.475321054458618, "learning_rate": 4.25e-05, "loss": 0.3413, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8008725610148754, "eval_loss": 0.34032315015792847, "eval_precision": 0.825657894736842, "eval_recall": 0.7850518276050191, "eval_runtime": 1.8225, "eval_samples_per_second": 218.935, "eval_steps_per_second": 27.436, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.4994115829467773, "learning_rate": 4e-05, "loss": 0.2888, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8151164115613962, "eval_loss": 0.3277858793735504, "eval_precision": 0.8159193371512123, "eval_recall": 0.8143298781596654, "eval_runtime": 1.824, "eval_samples_per_second": 218.745, "eval_steps_per_second": 27.412, "step": 488 }, { "epoch": 5.0, "grad_norm": 1.4922258853912354, "learning_rate": 3.7500000000000003e-05, "loss": 0.2577, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8289650949173301, "eval_loss": 0.3102515935897827, "eval_precision": 0.8325081997648369, "eval_recall": 0.82569558101473, "eval_runtime": 1.824, "eval_samples_per_second": 218.754, "eval_steps_per_second": 27.413, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.708418846130371, "learning_rate": 3.5e-05, "loss": 0.2495, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8368501045387564, "eval_loss": 0.30735519528388977, "eval_precision": 0.8435514312676942, "eval_recall": 0.8310147299509002, "eval_runtime": 1.8256, "eval_samples_per_second": 218.558, "eval_steps_per_second": 27.388, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.44828805327415466, "learning_rate": 3.2500000000000004e-05, "loss": 0.2391, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8393634395533442, "eval_loss": 0.3005428612232208, "eval_precision": 0.8402278542707444, "eval_recall": 0.8385160938352427, "eval_runtime": 1.8233, "eval_samples_per_second": 218.828, "eval_steps_per_second": 27.422, "step": 854 }, { "epoch": 8.0, "grad_norm": 2.012701988220215, "learning_rate": 3e-05, "loss": 0.2177, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8411818738518064, "eval_loss": 0.29794931411743164, "eval_precision": 0.8448542607834644, "eval_recall": 0.8377886888525186, "eval_runtime": 1.822, "eval_samples_per_second": 218.994, "eval_steps_per_second": 27.443, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.480318069458008, "learning_rate": 2.7500000000000004e-05, "loss": 0.2102, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8548827059465357, "eval_loss": 0.2961363196372986, "eval_precision": 0.8548827059465357, "eval_recall": 0.8548827059465357, "eval_runtime": 1.8216, "eval_samples_per_second": 219.038, "eval_steps_per_second": 27.448, "step": 1098 }, { "epoch": 10.0, "grad_norm": 5.2318220138549805, "learning_rate": 2.5e-05, "loss": 0.2029, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8339841249519908, "eval_loss": 0.3042662441730499, "eval_precision": 0.8579231241892538, "eval_recall": 0.8177850518276051, "eval_runtime": 1.8231, "eval_samples_per_second": 218.855, "eval_steps_per_second": 27.425, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.4476318061351776, "learning_rate": 2.25e-05, "loss": 0.1829, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8533986527862829, "eval_loss": 0.30592578649520874, "eval_precision": 0.8572003218020917, "eval_recall": 0.8498817966903074, "eval_runtime": 1.8223, "eval_samples_per_second": 218.951, "eval_steps_per_second": 27.437, "step": 1342 }, { "epoch": 12.0, "grad_norm": 6.3299665451049805, "learning_rate": 2e-05, "loss": 0.184, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.846679266293906, "eval_loss": 0.3001992702484131, "eval_precision": 0.8609191655801824, "eval_recall": 0.8356064739043463, "eval_runtime": 1.821, "eval_samples_per_second": 219.105, "eval_steps_per_second": 27.457, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.40616801381111145, "learning_rate": 1.75e-05, "loss": 0.1802, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8649122807017544, "eval_loss": 0.29539263248443604, "eval_precision": 0.8710116366366366, "eval_recall": 0.8594744498999818, "eval_runtime": 1.8246, "eval_samples_per_second": 218.68, "eval_steps_per_second": 27.404, "step": 1586 }, { "epoch": 14.0, "grad_norm": 4.2211737632751465, "learning_rate": 1.5e-05, "loss": 0.1684, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8642908431276217, "eval_loss": 0.3007645606994629, "eval_precision": 0.8633964654080464, "eval_recall": 0.8652027641389344, "eval_runtime": 1.8212, "eval_samples_per_second": 219.082, "eval_steps_per_second": 27.454, "step": 1708 }, { "epoch": 15.0, "grad_norm": 2.8058817386627197, "learning_rate": 1.25e-05, "loss": 0.1627, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8622036668943447, "eval_loss": 0.3066510558128357, "eval_precision": 0.8671602787456446, "eval_recall": 0.8577014002545917, "eval_runtime": 1.8221, "eval_samples_per_second": 218.981, "eval_steps_per_second": 27.441, "step": 1830 }, { "epoch": 16.0, "grad_norm": 1.3792524337768555, "learning_rate": 1e-05, "loss": 0.1581, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8522278069611882, "eval_loss": 0.31070253252983093, "eval_precision": 0.8513631702756499, "eval_recall": 0.8531096563011457, "eval_runtime": 1.8214, "eval_samples_per_second": 219.068, "eval_steps_per_second": 27.452, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.5356613397598267, "learning_rate": 7.5e-06, "loss": 0.1468, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8483536940081443, "eval_loss": 0.3229115903377533, "eval_precision": 0.8575792287132493, "eval_recall": 0.8406073831605747, "eval_runtime": 1.8217, "eval_samples_per_second": 219.021, "eval_steps_per_second": 27.446, "step": 2074 }, { "epoch": 18.0, "grad_norm": 2.944960832595825, "learning_rate": 5e-06, "loss": 0.1433, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8456742372671576, "eval_loss": 0.32474786043167114, "eval_precision": 0.8536697247706422, "eval_recall": 0.8388343335151845, "eval_runtime": 1.8221, "eval_samples_per_second": 218.981, "eval_steps_per_second": 27.441, "step": 2196 }, { "epoch": 19.0, "grad_norm": 4.548107147216797, "learning_rate": 2.5e-06, "loss": 0.1538, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8456742372671576, "eval_loss": 0.324598103761673, "eval_precision": 0.8536697247706422, "eval_recall": 0.8388343335151845, "eval_runtime": 1.8228, "eval_samples_per_second": 218.896, "eval_steps_per_second": 27.431, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.463918924331665, "learning_rate": 0.0, "loss": 0.1412, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8456742372671576, "eval_loss": 0.32347819209098816, "eval_precision": 0.8536697247706422, "eval_recall": 0.8388343335151845, "eval_runtime": 1.8216, "eval_samples_per_second": 219.041, "eval_steps_per_second": 27.449, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.2294086073265701, "train_runtime": 651.9313, "train_samples_per_second": 111.607, "train_steps_per_second": 3.743 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }