sentiment-lora-r16 / trainer_state.json
apwic's picture
End of training
4244da6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 3.6468505859375,
"learning_rate": 4.75e-05,
"loss": 0.5544,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7218045112781954,
"eval_f1": 0.6745776909116292,
"eval_loss": 0.5251643657684326,
"eval_precision": 0.6704603946239633,
"eval_recall": 0.6806692125841063,
"eval_runtime": 5.1543,
"eval_samples_per_second": 77.411,
"eval_steps_per_second": 9.701,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 3.0383548736572266,
"learning_rate": 4.5e-05,
"loss": 0.4974,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7844611528822055,
"eval_f1": 0.6839633068081344,
"eval_loss": 0.4520864188671112,
"eval_precision": 0.7792397660818713,
"eval_recall": 0.6649845426441171,
"eval_runtime": 5.0577,
"eval_samples_per_second": 78.89,
"eval_steps_per_second": 9.886,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 4.7733473777771,
"learning_rate": 4.25e-05,
"loss": 0.4206,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.8295739348370927,
"eval_f1": 0.782902317244911,
"eval_loss": 0.36416465044021606,
"eval_precision": 0.8034409776746092,
"eval_recall": 0.7694126204764502,
"eval_runtime": 5.0665,
"eval_samples_per_second": 78.752,
"eval_steps_per_second": 9.869,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 1.6582493782043457,
"learning_rate": 4e-05,
"loss": 0.3657,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8122237052238519,
"eval_loss": 0.33759891986846924,
"eval_precision": 0.8184491978609625,
"eval_recall": 0.8068285142753229,
"eval_runtime": 5.0866,
"eval_samples_per_second": 78.442,
"eval_steps_per_second": 9.83,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 3.0865817070007324,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.3188,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.849624060150376,
"eval_f1": 0.8167483159828537,
"eval_loss": 0.3197535574436188,
"eval_precision": 0.8201621387462095,
"eval_recall": 0.8136024731769412,
"eval_runtime": 5.1213,
"eval_samples_per_second": 77.91,
"eval_steps_per_second": 9.763,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 3.176858425140381,
"learning_rate": 3.5e-05,
"loss": 0.3069,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8255172205802521,
"eval_loss": 0.3127439022064209,
"eval_precision": 0.8239495798319327,
"eval_recall": 0.8271503909801782,
"eval_runtime": 5.0741,
"eval_samples_per_second": 78.635,
"eval_steps_per_second": 9.854,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 0.9114988446235657,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.2838,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8671679197994987,
"eval_f1": 0.8359744037230948,
"eval_loss": 0.30525630712509155,
"eval_precision": 0.8448835433371515,
"eval_recall": 0.828514275322786,
"eval_runtime": 5.0555,
"eval_samples_per_second": 78.924,
"eval_steps_per_second": 9.89,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 5.085402965545654,
"learning_rate": 3e-05,
"loss": 0.2699,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.87468671679198,
"eval_f1": 0.8403693509153758,
"eval_loss": 0.2976122498512268,
"eval_precision": 0.8647333925035843,
"eval_recall": 0.8238316057464994,
"eval_runtime": 5.0901,
"eval_samples_per_second": 78.388,
"eval_steps_per_second": 9.823,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 4.7740559577941895,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2614,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8342105263157895,
"eval_loss": 0.2887146770954132,
"eval_precision": 0.8398085585585586,
"eval_recall": 0.82924168030551,
"eval_runtime": 5.0975,
"eval_samples_per_second": 78.274,
"eval_steps_per_second": 9.809,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 3.8042409420013428,
"learning_rate": 2.5e-05,
"loss": 0.2515,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8298403801632752,
"eval_loss": 0.2852274477481842,
"eval_precision": 0.8315523576240049,
"eval_recall": 0.8281960356428442,
"eval_runtime": 5.0847,
"eval_samples_per_second": 78.471,
"eval_steps_per_second": 9.833,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 2.159557819366455,
"learning_rate": 2.25e-05,
"loss": 0.2453,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.844327731092437,
"eval_loss": 0.2800367474555969,
"eval_precision": 0.8411320530352577,
"eval_recall": 0.8477905073649754,
"eval_runtime": 5.0685,
"eval_samples_per_second": 78.721,
"eval_steps_per_second": 9.865,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 1.643934965133667,
"learning_rate": 2e-05,
"loss": 0.236,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8796992481203008,
"eval_f1": 0.8502252252252251,
"eval_loss": 0.27178239822387695,
"eval_precision": 0.863265306122449,
"eval_recall": 0.8398799781778505,
"eval_runtime": 5.0641,
"eval_samples_per_second": 78.79,
"eval_steps_per_second": 9.873,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 2.2930312156677246,
"learning_rate": 1.75e-05,
"loss": 0.227,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8796992481203008,
"eval_f1": 0.8541488972828073,
"eval_loss": 0.2711658477783203,
"eval_precision": 0.8559859154929578,
"eval_recall": 0.8523822513184216,
"eval_runtime": 5.1106,
"eval_samples_per_second": 78.073,
"eval_steps_per_second": 9.784,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 3.1738922595977783,
"learning_rate": 1.5e-05,
"loss": 0.227,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.8478885630498534,
"eval_loss": 0.27570030093193054,
"eval_precision": 0.8385980403326877,
"eval_recall": 0.8602927805055465,
"eval_runtime": 5.1226,
"eval_samples_per_second": 77.891,
"eval_steps_per_second": 9.761,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 2.479130506515503,
"learning_rate": 1.25e-05,
"loss": 0.2171,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8822055137844611,
"eval_f1": 0.8622085718274466,
"eval_loss": 0.27079105377197266,
"eval_precision": 0.8530168716042322,
"eval_recall": 0.8741589379887251,
"eval_runtime": 5.0709,
"eval_samples_per_second": 78.684,
"eval_steps_per_second": 9.86,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 2.9859211444854736,
"learning_rate": 1e-05,
"loss": 0.214,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8872180451127819,
"eval_f1": 0.8629148629148629,
"eval_loss": 0.2631725072860718,
"eval_precision": 0.8657894736842104,
"eval_recall": 0.860201854882706,
"eval_runtime": 5.0662,
"eval_samples_per_second": 78.757,
"eval_steps_per_second": 9.869,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 4.846933364868164,
"learning_rate": 7.5e-06,
"loss": 0.2124,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8822055137844611,
"eval_f1": 0.858259325044405,
"eval_loss": 0.26394030451774597,
"eval_precision": 0.8573798178418481,
"eval_recall": 0.8591562102200401,
"eval_runtime": 5.0705,
"eval_samples_per_second": 78.691,
"eval_steps_per_second": 9.861,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 6.029839515686035,
"learning_rate": 5e-06,
"loss": 0.2166,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8872180451127819,
"eval_f1": 0.8636104675452922,
"eval_loss": 0.2631914019584656,
"eval_precision": 0.8645363713902765,
"eval_recall": 0.8627023095108202,
"eval_runtime": 5.0927,
"eval_samples_per_second": 78.348,
"eval_steps_per_second": 9.818,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 2.059579372406006,
"learning_rate": 2.5e-06,
"loss": 0.2086,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8822055137844611,
"eval_f1": 0.8575487105473052,
"eval_loss": 0.2630336880683899,
"eval_precision": 0.8584592421103936,
"eval_recall": 0.8566557555919259,
"eval_runtime": 5.1111,
"eval_samples_per_second": 78.066,
"eval_steps_per_second": 9.783,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 2.347975254058838,
"learning_rate": 0.0,
"loss": 0.2113,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8872180451127819,
"eval_f1": 0.8629148629148629,
"eval_loss": 0.26311013102531433,
"eval_precision": 0.8657894736842104,
"eval_recall": 0.860201854882706,
"eval_runtime": 5.0704,
"eval_samples_per_second": 78.692,
"eval_steps_per_second": 9.861,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7650353185560000.0,
"train_loss": 0.28728070728114397,
"train_runtime": 1942.6788,
"train_samples_per_second": 37.526,
"train_steps_per_second": 1.256
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7650353185560000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}