nerugm-lora-r8-4 / trainer_state.json
apwic's picture
End of training
d17af2b verified
raw
history blame
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.4620718955993652,
"learning_rate": 4.75e-05,
"loss": 1.2537,
"step": 106
},
{
"epoch": 1.0,
"eval_accuracy": 0.8365739601279842,
"eval_f1": 0.0,
"eval_loss": 0.7370312809944153,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 1.0987,
"eval_samples_per_second": 170.195,
"eval_steps_per_second": 2.73,
"step": 106
},
{
"epoch": 2.0,
"grad_norm": 3.4634525775909424,
"learning_rate": 4.5e-05,
"loss": 0.7093,
"step": 212
},
{
"epoch": 2.0,
"eval_accuracy": 0.8373123307900566,
"eval_f1": 0.0056022408963585435,
"eval_loss": 0.6298087239265442,
"eval_precision": 0.16666666666666666,
"eval_recall": 0.002849002849002849,
"eval_runtime": 0.9814,
"eval_samples_per_second": 190.541,
"eval_steps_per_second": 3.057,
"step": 212
},
{
"epoch": 3.0,
"grad_norm": 1.2972607612609863,
"learning_rate": 4.25e-05,
"loss": 0.6232,
"step": 318
},
{
"epoch": 3.0,
"eval_accuracy": 0.8417425547624908,
"eval_f1": 0.032171581769437,
"eval_loss": 0.5443252325057983,
"eval_precision": 0.2727272727272727,
"eval_recall": 0.017094017094017096,
"eval_runtime": 1.0287,
"eval_samples_per_second": 181.785,
"eval_steps_per_second": 2.916,
"step": 318
},
{
"epoch": 4.0,
"grad_norm": 1.2424334287643433,
"learning_rate": 4e-05,
"loss": 0.5363,
"step": 424
},
{
"epoch": 4.0,
"eval_accuracy": 0.8624169333005168,
"eval_f1": 0.14977973568281938,
"eval_loss": 0.45594143867492676,
"eval_precision": 0.3300970873786408,
"eval_recall": 0.09686609686609686,
"eval_runtime": 0.9669,
"eval_samples_per_second": 193.397,
"eval_steps_per_second": 3.103,
"step": 424
},
{
"epoch": 5.0,
"grad_norm": 1.3577224016189575,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4591,
"step": 530
},
{
"epoch": 5.0,
"eval_accuracy": 0.8929362539995077,
"eval_f1": 0.3604240282685513,
"eval_loss": 0.3863191306591034,
"eval_precision": 0.4744186046511628,
"eval_recall": 0.2905982905982906,
"eval_runtime": 0.9477,
"eval_samples_per_second": 197.315,
"eval_steps_per_second": 3.165,
"step": 530
},
{
"epoch": 6.0,
"grad_norm": 1.2191588878631592,
"learning_rate": 3.5e-05,
"loss": 0.387,
"step": 636
},
{
"epoch": 6.0,
"eval_accuracy": 0.9190253507260645,
"eval_f1": 0.5714285714285715,
"eval_loss": 0.32724133133888245,
"eval_precision": 0.5789473684210527,
"eval_recall": 0.5641025641025641,
"eval_runtime": 0.9315,
"eval_samples_per_second": 200.744,
"eval_steps_per_second": 3.22,
"step": 636
},
{
"epoch": 7.0,
"grad_norm": 1.1846706867218018,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.3252,
"step": 742
},
{
"epoch": 7.0,
"eval_accuracy": 0.9291164164410534,
"eval_f1": 0.6436170212765958,
"eval_loss": 0.2810536026954651,
"eval_precision": 0.6034912718204489,
"eval_recall": 0.6894586894586895,
"eval_runtime": 0.9504,
"eval_samples_per_second": 196.764,
"eval_steps_per_second": 3.157,
"step": 742
},
{
"epoch": 8.0,
"grad_norm": 1.0833159685134888,
"learning_rate": 3e-05,
"loss": 0.2874,
"step": 848
},
{
"epoch": 8.0,
"eval_accuracy": 0.9313315284272705,
"eval_f1": 0.655217965653897,
"eval_loss": 0.24546079337596893,
"eval_precision": 0.6108374384236454,
"eval_recall": 0.7065527065527065,
"eval_runtime": 0.9612,
"eval_samples_per_second": 194.541,
"eval_steps_per_second": 3.121,
"step": 848
},
{
"epoch": 9.0,
"grad_norm": 1.9407267570495605,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2588,
"step": 954
},
{
"epoch": 9.0,
"eval_accuracy": 0.9333005168594635,
"eval_f1": 0.6761290322580644,
"eval_loss": 0.22847984731197357,
"eval_precision": 0.6179245283018868,
"eval_recall": 0.7464387464387464,
"eval_runtime": 0.9716,
"eval_samples_per_second": 192.466,
"eval_steps_per_second": 3.088,
"step": 954
},
{
"epoch": 10.0,
"grad_norm": 0.7366420030593872,
"learning_rate": 2.5e-05,
"loss": 0.2393,
"step": 1060
},
{
"epoch": 10.0,
"eval_accuracy": 0.9362539995077529,
"eval_f1": 0.6975546975546976,
"eval_loss": 0.21532489359378815,
"eval_precision": 0.636150234741784,
"eval_recall": 0.7720797720797721,
"eval_runtime": 0.9655,
"eval_samples_per_second": 193.678,
"eval_steps_per_second": 3.107,
"step": 1060
},
{
"epoch": 11.0,
"grad_norm": 1.1416951417922974,
"learning_rate": 2.25e-05,
"loss": 0.224,
"step": 1166
},
{
"epoch": 11.0,
"eval_accuracy": 0.9387152350479941,
"eval_f1": 0.7030456852791879,
"eval_loss": 0.2062499076128006,
"eval_precision": 0.6338672768878718,
"eval_recall": 0.7891737891737892,
"eval_runtime": 0.9619,
"eval_samples_per_second": 194.401,
"eval_steps_per_second": 3.119,
"step": 1166
},
{
"epoch": 12.0,
"grad_norm": 1.7414947748184204,
"learning_rate": 2e-05,
"loss": 0.2137,
"step": 1272
},
{
"epoch": 12.0,
"eval_accuracy": 0.9387152350479941,
"eval_f1": 0.7135549872122762,
"eval_loss": 0.20024912059307098,
"eval_precision": 0.6473317865429234,
"eval_recall": 0.7948717948717948,
"eval_runtime": 0.9544,
"eval_samples_per_second": 195.928,
"eval_steps_per_second": 3.143,
"step": 1272
},
{
"epoch": 13.0,
"grad_norm": 1.186489224433899,
"learning_rate": 1.75e-05,
"loss": 0.2052,
"step": 1378
},
{
"epoch": 13.0,
"eval_accuracy": 0.9424070883583558,
"eval_f1": 0.7218628719275549,
"eval_loss": 0.18892288208007812,
"eval_precision": 0.6611374407582938,
"eval_recall": 0.7948717948717948,
"eval_runtime": 0.9502,
"eval_samples_per_second": 196.793,
"eval_steps_per_second": 3.157,
"step": 1378
},
{
"epoch": 14.0,
"grad_norm": 1.4163442850112915,
"learning_rate": 1.5e-05,
"loss": 0.2039,
"step": 1484
},
{
"epoch": 14.0,
"eval_accuracy": 0.9431454590204282,
"eval_f1": 0.7312661498708009,
"eval_loss": 0.18623687326908112,
"eval_precision": 0.6690307328605201,
"eval_recall": 0.8062678062678063,
"eval_runtime": 0.9553,
"eval_samples_per_second": 195.753,
"eval_steps_per_second": 3.14,
"step": 1484
},
{
"epoch": 15.0,
"grad_norm": 1.5289697647094727,
"learning_rate": 1.25e-05,
"loss": 0.1975,
"step": 1590
},
{
"epoch": 15.0,
"eval_accuracy": 0.9431454590204282,
"eval_f1": 0.7319587628865979,
"eval_loss": 0.1867983341217041,
"eval_precision": 0.668235294117647,
"eval_recall": 0.8091168091168092,
"eval_runtime": 0.9597,
"eval_samples_per_second": 194.853,
"eval_steps_per_second": 3.126,
"step": 1590
},
{
"epoch": 16.0,
"grad_norm": 2.371168375015259,
"learning_rate": 1e-05,
"loss": 0.1936,
"step": 1696
},
{
"epoch": 16.0,
"eval_accuracy": 0.94265321191238,
"eval_f1": 0.7321428571428572,
"eval_loss": 0.18374690413475037,
"eval_precision": 0.6628175519630485,
"eval_recall": 0.8176638176638177,
"eval_runtime": 0.9485,
"eval_samples_per_second": 197.152,
"eval_steps_per_second": 3.163,
"step": 1696
},
{
"epoch": 17.0,
"grad_norm": 0.7330523133277893,
"learning_rate": 7.5e-06,
"loss": 0.1908,
"step": 1802
},
{
"epoch": 17.0,
"eval_accuracy": 0.94265321191238,
"eval_f1": 0.7305236270753512,
"eval_loss": 0.18250302970409393,
"eval_precision": 0.6620370370370371,
"eval_recall": 0.8148148148148148,
"eval_runtime": 0.9449,
"eval_samples_per_second": 197.907,
"eval_steps_per_second": 3.175,
"step": 1802
},
{
"epoch": 18.0,
"grad_norm": 1.4619590044021606,
"learning_rate": 5e-06,
"loss": 0.1885,
"step": 1908
},
{
"epoch": 18.0,
"eval_accuracy": 0.9431454590204282,
"eval_f1": 0.7270408163265305,
"eval_loss": 0.1805543154478073,
"eval_precision": 0.6581986143187067,
"eval_recall": 0.811965811965812,
"eval_runtime": 0.95,
"eval_samples_per_second": 196.84,
"eval_steps_per_second": 3.158,
"step": 1908
},
{
"epoch": 19.0,
"grad_norm": 0.7237643003463745,
"learning_rate": 2.5e-06,
"loss": 0.1877,
"step": 2014
},
{
"epoch": 19.0,
"eval_accuracy": 0.9431454590204282,
"eval_f1": 0.7247119078104994,
"eval_loss": 0.17826727032661438,
"eval_precision": 0.6581395348837209,
"eval_recall": 0.8062678062678063,
"eval_runtime": 0.9528,
"eval_samples_per_second": 196.268,
"eval_steps_per_second": 3.149,
"step": 2014
},
{
"epoch": 20.0,
"grad_norm": 2.3640408515930176,
"learning_rate": 0.0,
"loss": 0.1858,
"step": 2120
},
{
"epoch": 20.0,
"eval_accuracy": 0.9433915825744523,
"eval_f1": 0.7279693486590039,
"eval_loss": 0.1783868670463562,
"eval_precision": 0.6597222222222222,
"eval_recall": 0.811965811965812,
"eval_runtime": 0.9505,
"eval_samples_per_second": 196.741,
"eval_steps_per_second": 3.156,
"step": 2120
},
{
"epoch": 20.0,
"step": 2120,
"total_flos": 907028676246000.0,
"train_loss": 0.35348991987840184,
"train_runtime": 247.3863,
"train_samples_per_second": 136.467,
"train_steps_per_second": 8.57
}
],
"logging_steps": 500,
"max_steps": 2120,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 907028676246000.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}