{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.6468505859375, "learning_rate": 4.75e-05, "loss": 0.5544, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7218045112781954, "eval_f1": 0.6745776909116292, "eval_loss": 0.5251643657684326, "eval_precision": 0.6704603946239633, "eval_recall": 0.6806692125841063, "eval_runtime": 5.1543, "eval_samples_per_second": 77.411, "eval_steps_per_second": 9.701, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.0383548736572266, "learning_rate": 4.5e-05, "loss": 0.4974, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7844611528822055, "eval_f1": 0.6839633068081344, "eval_loss": 0.4520864188671112, "eval_precision": 0.7792397660818713, "eval_recall": 0.6649845426441171, "eval_runtime": 5.0577, "eval_samples_per_second": 78.89, "eval_steps_per_second": 9.886, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.7733473777771, "learning_rate": 4.25e-05, "loss": 0.4206, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8295739348370927, "eval_f1": 0.782902317244911, "eval_loss": 0.36416465044021606, "eval_precision": 0.8034409776746092, "eval_recall": 0.7694126204764502, "eval_runtime": 5.0665, "eval_samples_per_second": 78.752, "eval_steps_per_second": 9.869, "step": 366 }, { "epoch": 4.0, "grad_norm": 1.6582493782043457, "learning_rate": 4e-05, "loss": 0.3657, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8122237052238519, "eval_loss": 0.33759891986846924, "eval_precision": 0.8184491978609625, "eval_recall": 0.8068285142753229, "eval_runtime": 5.0866, "eval_samples_per_second": 78.442, "eval_steps_per_second": 9.83, "step": 488 }, { "epoch": 5.0, "grad_norm": 3.0865817070007324, "learning_rate": 3.7500000000000003e-05, "loss": 0.3188, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8167483159828537, "eval_loss": 0.3197535574436188, "eval_precision": 0.8201621387462095, "eval_recall": 0.8136024731769412, "eval_runtime": 5.1213, "eval_samples_per_second": 77.91, "eval_steps_per_second": 9.763, "step": 610 }, { "epoch": 6.0, "grad_norm": 3.176858425140381, "learning_rate": 3.5e-05, "loss": 0.3069, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8255172205802521, "eval_loss": 0.3127439022064209, "eval_precision": 0.8239495798319327, "eval_recall": 0.8271503909801782, "eval_runtime": 5.0741, "eval_samples_per_second": 78.635, "eval_steps_per_second": 9.854, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.9114988446235657, "learning_rate": 3.2500000000000004e-05, "loss": 0.2838, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8359744037230948, "eval_loss": 0.30525630712509155, "eval_precision": 0.8448835433371515, "eval_recall": 0.828514275322786, "eval_runtime": 5.0555, "eval_samples_per_second": 78.924, "eval_steps_per_second": 9.89, "step": 854 }, { "epoch": 8.0, "grad_norm": 5.085402965545654, "learning_rate": 3e-05, "loss": 0.2699, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8403693509153758, "eval_loss": 0.2976122498512268, "eval_precision": 0.8647333925035843, "eval_recall": 0.8238316057464994, "eval_runtime": 5.0901, "eval_samples_per_second": 78.388, "eval_steps_per_second": 9.823, "step": 976 }, { "epoch": 9.0, "grad_norm": 4.7740559577941895, "learning_rate": 2.7500000000000004e-05, "loss": 0.2614, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8342105263157895, "eval_loss": 0.2887146770954132, "eval_precision": 0.8398085585585586, "eval_recall": 0.82924168030551, "eval_runtime": 5.0975, "eval_samples_per_second": 78.274, "eval_steps_per_second": 9.809, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.8042409420013428, "learning_rate": 2.5e-05, "loss": 0.2515, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8298403801632752, "eval_loss": 0.2852274477481842, "eval_precision": 0.8315523576240049, "eval_recall": 0.8281960356428442, "eval_runtime": 5.0847, "eval_samples_per_second": 78.471, "eval_steps_per_second": 9.833, "step": 1220 }, { "epoch": 11.0, "grad_norm": 2.159557819366455, "learning_rate": 2.25e-05, "loss": 0.2453, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.844327731092437, "eval_loss": 0.2800367474555969, "eval_precision": 0.8411320530352577, "eval_recall": 0.8477905073649754, "eval_runtime": 5.0685, "eval_samples_per_second": 78.721, "eval_steps_per_second": 9.865, "step": 1342 }, { "epoch": 12.0, "grad_norm": 1.643934965133667, "learning_rate": 2e-05, "loss": 0.236, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8502252252252251, "eval_loss": 0.27178239822387695, "eval_precision": 0.863265306122449, "eval_recall": 0.8398799781778505, "eval_runtime": 5.0641, "eval_samples_per_second": 78.79, "eval_steps_per_second": 9.873, "step": 1464 }, { "epoch": 13.0, "grad_norm": 2.2930312156677246, "learning_rate": 1.75e-05, "loss": 0.227, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8541488972828073, "eval_loss": 0.2711658477783203, "eval_precision": 0.8559859154929578, "eval_recall": 0.8523822513184216, "eval_runtime": 5.1106, "eval_samples_per_second": 78.073, "eval_steps_per_second": 9.784, "step": 1586 }, { "epoch": 14.0, "grad_norm": 3.1738922595977783, "learning_rate": 1.5e-05, "loss": 0.227, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8478885630498534, "eval_loss": 0.27570030093193054, "eval_precision": 0.8385980403326877, "eval_recall": 0.8602927805055465, "eval_runtime": 5.1226, "eval_samples_per_second": 77.891, "eval_steps_per_second": 9.761, "step": 1708 }, { "epoch": 15.0, "grad_norm": 2.479130506515503, "learning_rate": 1.25e-05, "loss": 0.2171, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8622085718274466, "eval_loss": 0.27079105377197266, "eval_precision": 0.8530168716042322, "eval_recall": 0.8741589379887251, "eval_runtime": 5.0709, "eval_samples_per_second": 78.684, "eval_steps_per_second": 9.86, "step": 1830 }, { "epoch": 16.0, "grad_norm": 2.9859211444854736, "learning_rate": 1e-05, "loss": 0.214, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8629148629148629, "eval_loss": 0.2631725072860718, "eval_precision": 0.8657894736842104, "eval_recall": 0.860201854882706, "eval_runtime": 5.0662, "eval_samples_per_second": 78.757, "eval_steps_per_second": 9.869, "step": 1952 }, { "epoch": 17.0, "grad_norm": 4.846933364868164, "learning_rate": 7.5e-06, "loss": 0.2124, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.858259325044405, "eval_loss": 0.26394030451774597, "eval_precision": 0.8573798178418481, "eval_recall": 0.8591562102200401, "eval_runtime": 5.0705, "eval_samples_per_second": 78.691, "eval_steps_per_second": 9.861, "step": 2074 }, { "epoch": 18.0, "grad_norm": 6.029839515686035, "learning_rate": 5e-06, "loss": 0.2166, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8636104675452922, "eval_loss": 0.2631914019584656, "eval_precision": 0.8645363713902765, "eval_recall": 0.8627023095108202, "eval_runtime": 5.0927, "eval_samples_per_second": 78.348, "eval_steps_per_second": 9.818, "step": 2196 }, { "epoch": 19.0, "grad_norm": 2.059579372406006, "learning_rate": 2.5e-06, "loss": 0.2086, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8575487105473052, "eval_loss": 0.2630336880683899, "eval_precision": 0.8584592421103936, "eval_recall": 0.8566557555919259, "eval_runtime": 5.1111, "eval_samples_per_second": 78.066, "eval_steps_per_second": 9.783, "step": 2318 }, { "epoch": 20.0, "grad_norm": 2.347975254058838, "learning_rate": 0.0, "loss": 0.2113, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8629148629148629, "eval_loss": 0.26311013102531433, "eval_precision": 0.8657894736842104, "eval_recall": 0.860201854882706, "eval_runtime": 5.0704, "eval_samples_per_second": 78.692, "eval_steps_per_second": 9.861, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7650353185560000.0, "train_loss": 0.28728070728114397, "train_runtime": 1942.6788, "train_samples_per_second": 37.526, "train_steps_per_second": 1.256 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7650353185560000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }