nerugm-lora-r16-2 / trainer_state.json
apwic's picture
End of training
fbde14b verified
raw
history blame
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.7648299932479858,
"learning_rate": 4.75e-05,
"loss": 1.1483,
"step": 106
},
{
"epoch": 1.0,
"eval_accuracy": 0.8448836627470603,
"eval_f1": 0.0,
"eval_loss": 0.6899715065956116,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 0.8894,
"eval_samples_per_second": 210.248,
"eval_steps_per_second": 3.373,
"step": 106
},
{
"epoch": 2.0,
"grad_norm": 1.372132420539856,
"learning_rate": 4.5e-05,
"loss": 0.6875,
"step": 212
},
{
"epoch": 2.0,
"eval_accuracy": 0.8463847885914436,
"eval_f1": 0.0,
"eval_loss": 0.5737118721008301,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 0.926,
"eval_samples_per_second": 201.946,
"eval_steps_per_second": 3.24,
"step": 212
},
{
"epoch": 3.0,
"grad_norm": 1.2157526016235352,
"learning_rate": 4.25e-05,
"loss": 0.5874,
"step": 318
},
{
"epoch": 3.0,
"eval_accuracy": 0.8633975481611208,
"eval_f1": 0.10071942446043167,
"eval_loss": 0.4661254584789276,
"eval_precision": 0.2692307692307692,
"eval_recall": 0.061946902654867256,
"eval_runtime": 0.9773,
"eval_samples_per_second": 191.347,
"eval_steps_per_second": 3.07,
"step": 318
},
{
"epoch": 4.0,
"grad_norm": 2.093184232711792,
"learning_rate": 4e-05,
"loss": 0.4729,
"step": 424
},
{
"epoch": 4.0,
"eval_accuracy": 0.898173630222667,
"eval_f1": 0.37722419928825623,
"eval_loss": 0.3599094748497009,
"eval_precision": 0.47533632286995514,
"eval_recall": 0.31268436578171094,
"eval_runtime": 0.9352,
"eval_samples_per_second": 199.961,
"eval_steps_per_second": 3.208,
"step": 424
},
{
"epoch": 5.0,
"grad_norm": 4.716677665710449,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.3692,
"step": 530
},
{
"epoch": 5.0,
"eval_accuracy": 0.9246935201401051,
"eval_f1": 0.5917496443812232,
"eval_loss": 0.29404327273368835,
"eval_precision": 0.5714285714285714,
"eval_recall": 0.6135693215339233,
"eval_runtime": 0.899,
"eval_samples_per_second": 208.018,
"eval_steps_per_second": 3.337,
"step": 530
},
{
"epoch": 6.0,
"grad_norm": 1.1444728374481201,
"learning_rate": 3.5e-05,
"loss": 0.3058,
"step": 636
},
{
"epoch": 6.0,
"eval_accuracy": 0.9334500875656743,
"eval_f1": 0.6621621621621622,
"eval_loss": 0.2527407705783844,
"eval_precision": 0.6109725685785536,
"eval_recall": 0.7227138643067846,
"eval_runtime": 0.8914,
"eval_samples_per_second": 209.792,
"eval_steps_per_second": 3.366,
"step": 636
},
{
"epoch": 7.0,
"grad_norm": 0.998134434223175,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.2636,
"step": 742
},
{
"epoch": 7.0,
"eval_accuracy": 0.937453089817363,
"eval_f1": 0.6954177897574124,
"eval_loss": 0.22462092339992523,
"eval_precision": 0.6401985111662531,
"eval_recall": 0.7610619469026548,
"eval_runtime": 0.9047,
"eval_samples_per_second": 206.701,
"eval_steps_per_second": 3.316,
"step": 742
},
{
"epoch": 8.0,
"grad_norm": 1.480947732925415,
"learning_rate": 3e-05,
"loss": 0.24,
"step": 848
},
{
"epoch": 8.0,
"eval_accuracy": 0.9417062797097824,
"eval_f1": 0.7241379310344828,
"eval_loss": 0.20909550786018372,
"eval_precision": 0.6578313253012048,
"eval_recall": 0.8053097345132744,
"eval_runtime": 0.9189,
"eval_samples_per_second": 203.503,
"eval_steps_per_second": 3.265,
"step": 848
},
{
"epoch": 9.0,
"grad_norm": 1.5677202939987183,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2228,
"step": 954
},
{
"epoch": 9.0,
"eval_accuracy": 0.940205153865399,
"eval_f1": 0.716883116883117,
"eval_loss": 0.1985715925693512,
"eval_precision": 0.6403712296983759,
"eval_recall": 0.8141592920353983,
"eval_runtime": 0.9225,
"eval_samples_per_second": 202.716,
"eval_steps_per_second": 3.252,
"step": 954
},
{
"epoch": 10.0,
"grad_norm": 4.242694854736328,
"learning_rate": 2.5e-05,
"loss": 0.2105,
"step": 1060
},
{
"epoch": 10.0,
"eval_accuracy": 0.9417062797097824,
"eval_f1": 0.733245729303548,
"eval_loss": 0.1821284145116806,
"eval_precision": 0.6611374407582938,
"eval_recall": 0.8230088495575221,
"eval_runtime": 0.925,
"eval_samples_per_second": 202.159,
"eval_steps_per_second": 3.243,
"step": 1060
},
{
"epoch": 11.0,
"grad_norm": 1.5337995290756226,
"learning_rate": 2.25e-05,
"loss": 0.2007,
"step": 1166
},
{
"epoch": 11.0,
"eval_accuracy": 0.9432074055541656,
"eval_f1": 0.7394736842105263,
"eval_loss": 0.17939399182796478,
"eval_precision": 0.667458432304038,
"eval_recall": 0.8289085545722714,
"eval_runtime": 0.9211,
"eval_samples_per_second": 203.028,
"eval_steps_per_second": 3.257,
"step": 1166
},
{
"epoch": 12.0,
"grad_norm": 2.7884016036987305,
"learning_rate": 2e-05,
"loss": 0.195,
"step": 1272
},
{
"epoch": 12.0,
"eval_accuracy": 0.9429572179134351,
"eval_f1": 0.7392996108949417,
"eval_loss": 0.18079817295074463,
"eval_precision": 0.6597222222222222,
"eval_recall": 0.8407079646017699,
"eval_runtime": 0.9116,
"eval_samples_per_second": 205.139,
"eval_steps_per_second": 3.291,
"step": 1272
},
{
"epoch": 13.0,
"grad_norm": 1.4956753253936768,
"learning_rate": 1.75e-05,
"loss": 0.19,
"step": 1378
},
{
"epoch": 13.0,
"eval_accuracy": 0.9459594696022017,
"eval_f1": 0.7463479415670652,
"eval_loss": 0.16896921396255493,
"eval_precision": 0.678743961352657,
"eval_recall": 0.8289085545722714,
"eval_runtime": 0.9045,
"eval_samples_per_second": 206.741,
"eval_steps_per_second": 3.317,
"step": 1378
},
{
"epoch": 14.0,
"grad_norm": 1.4634425640106201,
"learning_rate": 1.5e-05,
"loss": 0.1835,
"step": 1484
},
{
"epoch": 14.0,
"eval_accuracy": 0.9477107830873155,
"eval_f1": 0.751336898395722,
"eval_loss": 0.16314250230789185,
"eval_precision": 0.687041564792176,
"eval_recall": 0.8289085545722714,
"eval_runtime": 0.9091,
"eval_samples_per_second": 205.694,
"eval_steps_per_second": 3.3,
"step": 1484
},
{
"epoch": 15.0,
"grad_norm": 2.0593953132629395,
"learning_rate": 1.25e-05,
"loss": 0.1821,
"step": 1590
},
{
"epoch": 15.0,
"eval_accuracy": 0.9472104078058544,
"eval_f1": 0.753968253968254,
"eval_loss": 0.16711552441120148,
"eval_precision": 0.6834532374100719,
"eval_recall": 0.8407079646017699,
"eval_runtime": 0.9129,
"eval_samples_per_second": 204.846,
"eval_steps_per_second": 3.286,
"step": 1590
},
{
"epoch": 16.0,
"grad_norm": 1.1103074550628662,
"learning_rate": 1e-05,
"loss": 0.1774,
"step": 1696
},
{
"epoch": 16.0,
"eval_accuracy": 0.9472104078058544,
"eval_f1": 0.7647831800262812,
"eval_loss": 0.16675202548503876,
"eval_precision": 0.6895734597156398,
"eval_recall": 0.8584070796460177,
"eval_runtime": 0.913,
"eval_samples_per_second": 204.819,
"eval_steps_per_second": 3.286,
"step": 1696
},
{
"epoch": 17.0,
"grad_norm": 2.5177829265594482,
"learning_rate": 7.5e-06,
"loss": 0.1764,
"step": 1802
},
{
"epoch": 17.0,
"eval_accuracy": 0.9477107830873155,
"eval_f1": 0.7602649006622517,
"eval_loss": 0.16347847878932953,
"eval_precision": 0.6899038461538461,
"eval_recall": 0.8466076696165191,
"eval_runtime": 0.9243,
"eval_samples_per_second": 202.326,
"eval_steps_per_second": 3.246,
"step": 1802
},
{
"epoch": 18.0,
"grad_norm": 0.8699701428413391,
"learning_rate": 5e-06,
"loss": 0.1729,
"step": 1908
},
{
"epoch": 18.0,
"eval_accuracy": 0.9472104078058544,
"eval_f1": 0.7611548556430445,
"eval_loss": 0.16536261141300201,
"eval_precision": 0.6855791962174941,
"eval_recall": 0.855457227138643,
"eval_runtime": 0.9101,
"eval_samples_per_second": 205.482,
"eval_steps_per_second": 3.297,
"step": 1908
},
{
"epoch": 19.0,
"grad_norm": 2.786510705947876,
"learning_rate": 2.5e-06,
"loss": 0.1726,
"step": 2014
},
{
"epoch": 19.0,
"eval_accuracy": 0.9477107830873155,
"eval_f1": 0.7621550591327202,
"eval_loss": 0.16280074417591095,
"eval_precision": 0.6872037914691943,
"eval_recall": 0.855457227138643,
"eval_runtime": 0.9163,
"eval_samples_per_second": 204.074,
"eval_steps_per_second": 3.274,
"step": 2014
},
{
"epoch": 20.0,
"grad_norm": 1.0649765729904175,
"learning_rate": 0.0,
"loss": 0.1684,
"step": 2120
},
{
"epoch": 20.0,
"eval_accuracy": 0.9472104078058544,
"eval_f1": 0.7595269382391592,
"eval_loss": 0.16256052255630493,
"eval_precision": 0.6848341232227488,
"eval_recall": 0.8525073746312685,
"eval_runtime": 0.9134,
"eval_samples_per_second": 204.735,
"eval_steps_per_second": 3.285,
"step": 2120
},
{
"epoch": 20.0,
"step": 2120,
"total_flos": 904262544144960.0,
"train_loss": 0.3163445589677343,
"train_runtime": 244.3626,
"train_samples_per_second": 138.074,
"train_steps_per_second": 8.676
}
],
"logging_steps": 500,
"max_steps": 2120,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 904262544144960.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}