|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.6468505859375, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5544, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7218045112781954, |
|
"eval_f1": 0.6745776909116292, |
|
"eval_loss": 0.5251643657684326, |
|
"eval_precision": 0.6704603946239633, |
|
"eval_recall": 0.6806692125841063, |
|
"eval_runtime": 5.1543, |
|
"eval_samples_per_second": 77.411, |
|
"eval_steps_per_second": 9.701, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.0383548736572266, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4974, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7844611528822055, |
|
"eval_f1": 0.6839633068081344, |
|
"eval_loss": 0.4520864188671112, |
|
"eval_precision": 0.7792397660818713, |
|
"eval_recall": 0.6649845426441171, |
|
"eval_runtime": 5.0577, |
|
"eval_samples_per_second": 78.89, |
|
"eval_steps_per_second": 9.886, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.7733473777771, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4206, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8295739348370927, |
|
"eval_f1": 0.782902317244911, |
|
"eval_loss": 0.36416465044021606, |
|
"eval_precision": 0.8034409776746092, |
|
"eval_recall": 0.7694126204764502, |
|
"eval_runtime": 5.0665, |
|
"eval_samples_per_second": 78.752, |
|
"eval_steps_per_second": 9.869, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.6582493782043457, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3657, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8122237052238519, |
|
"eval_loss": 0.33759891986846924, |
|
"eval_precision": 0.8184491978609625, |
|
"eval_recall": 0.8068285142753229, |
|
"eval_runtime": 5.0866, |
|
"eval_samples_per_second": 78.442, |
|
"eval_steps_per_second": 9.83, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.0865817070007324, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3188, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8167483159828537, |
|
"eval_loss": 0.3197535574436188, |
|
"eval_precision": 0.8201621387462095, |
|
"eval_recall": 0.8136024731769412, |
|
"eval_runtime": 5.1213, |
|
"eval_samples_per_second": 77.91, |
|
"eval_steps_per_second": 9.763, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.176858425140381, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3069, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8255172205802521, |
|
"eval_loss": 0.3127439022064209, |
|
"eval_precision": 0.8239495798319327, |
|
"eval_recall": 0.8271503909801782, |
|
"eval_runtime": 5.0741, |
|
"eval_samples_per_second": 78.635, |
|
"eval_steps_per_second": 9.854, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.9114988446235657, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2838, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8359744037230948, |
|
"eval_loss": 0.30525630712509155, |
|
"eval_precision": 0.8448835433371515, |
|
"eval_recall": 0.828514275322786, |
|
"eval_runtime": 5.0555, |
|
"eval_samples_per_second": 78.924, |
|
"eval_steps_per_second": 9.89, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 5.085402965545654, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2699, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8403693509153758, |
|
"eval_loss": 0.2976122498512268, |
|
"eval_precision": 0.8647333925035843, |
|
"eval_recall": 0.8238316057464994, |
|
"eval_runtime": 5.0901, |
|
"eval_samples_per_second": 78.388, |
|
"eval_steps_per_second": 9.823, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.7740559577941895, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2614, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8342105263157895, |
|
"eval_loss": 0.2887146770954132, |
|
"eval_precision": 0.8398085585585586, |
|
"eval_recall": 0.82924168030551, |
|
"eval_runtime": 5.0975, |
|
"eval_samples_per_second": 78.274, |
|
"eval_steps_per_second": 9.809, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.8042409420013428, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2515, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8298403801632752, |
|
"eval_loss": 0.2852274477481842, |
|
"eval_precision": 0.8315523576240049, |
|
"eval_recall": 0.8281960356428442, |
|
"eval_runtime": 5.0847, |
|
"eval_samples_per_second": 78.471, |
|
"eval_steps_per_second": 9.833, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.159557819366455, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2453, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.844327731092437, |
|
"eval_loss": 0.2800367474555969, |
|
"eval_precision": 0.8411320530352577, |
|
"eval_recall": 0.8477905073649754, |
|
"eval_runtime": 5.0685, |
|
"eval_samples_per_second": 78.721, |
|
"eval_steps_per_second": 9.865, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.643934965133667, |
|
"learning_rate": 2e-05, |
|
"loss": 0.236, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8502252252252251, |
|
"eval_loss": 0.27178239822387695, |
|
"eval_precision": 0.863265306122449, |
|
"eval_recall": 0.8398799781778505, |
|
"eval_runtime": 5.0641, |
|
"eval_samples_per_second": 78.79, |
|
"eval_steps_per_second": 9.873, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 2.2930312156677246, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.227, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8541488972828073, |
|
"eval_loss": 0.2711658477783203, |
|
"eval_precision": 0.8559859154929578, |
|
"eval_recall": 0.8523822513184216, |
|
"eval_runtime": 5.1106, |
|
"eval_samples_per_second": 78.073, |
|
"eval_steps_per_second": 9.784, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 3.1738922595977783, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.227, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8478885630498534, |
|
"eval_loss": 0.27570030093193054, |
|
"eval_precision": 0.8385980403326877, |
|
"eval_recall": 0.8602927805055465, |
|
"eval_runtime": 5.1226, |
|
"eval_samples_per_second": 77.891, |
|
"eval_steps_per_second": 9.761, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 2.479130506515503, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2171, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8622085718274466, |
|
"eval_loss": 0.27079105377197266, |
|
"eval_precision": 0.8530168716042322, |
|
"eval_recall": 0.8741589379887251, |
|
"eval_runtime": 5.0709, |
|
"eval_samples_per_second": 78.684, |
|
"eval_steps_per_second": 9.86, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 2.9859211444854736, |
|
"learning_rate": 1e-05, |
|
"loss": 0.214, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8629148629148629, |
|
"eval_loss": 0.2631725072860718, |
|
"eval_precision": 0.8657894736842104, |
|
"eval_recall": 0.860201854882706, |
|
"eval_runtime": 5.0662, |
|
"eval_samples_per_second": 78.757, |
|
"eval_steps_per_second": 9.869, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 4.846933364868164, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2124, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.858259325044405, |
|
"eval_loss": 0.26394030451774597, |
|
"eval_precision": 0.8573798178418481, |
|
"eval_recall": 0.8591562102200401, |
|
"eval_runtime": 5.0705, |
|
"eval_samples_per_second": 78.691, |
|
"eval_steps_per_second": 9.861, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 6.029839515686035, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2166, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8636104675452922, |
|
"eval_loss": 0.2631914019584656, |
|
"eval_precision": 0.8645363713902765, |
|
"eval_recall": 0.8627023095108202, |
|
"eval_runtime": 5.0927, |
|
"eval_samples_per_second": 78.348, |
|
"eval_steps_per_second": 9.818, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 2.059579372406006, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2086, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8575487105473052, |
|
"eval_loss": 0.2630336880683899, |
|
"eval_precision": 0.8584592421103936, |
|
"eval_recall": 0.8566557555919259, |
|
"eval_runtime": 5.1111, |
|
"eval_samples_per_second": 78.066, |
|
"eval_steps_per_second": 9.783, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.347975254058838, |
|
"learning_rate": 0.0, |
|
"loss": 0.2113, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8629148629148629, |
|
"eval_loss": 0.26311013102531433, |
|
"eval_precision": 0.8657894736842104, |
|
"eval_recall": 0.860201854882706, |
|
"eval_runtime": 5.0704, |
|
"eval_samples_per_second": 78.692, |
|
"eval_steps_per_second": 9.861, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7650353185560000.0, |
|
"train_loss": 0.28728070728114397, |
|
"train_runtime": 1942.6788, |
|
"train_samples_per_second": 37.526, |
|
"train_steps_per_second": 1.256 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7650353185560000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|