aya-101-lora_rank4_lr5e-5-sw / trainer_state.json
faridlazuarda's picture
Initial commit of latest checkpoint files
d442e46 verified
{
"best_metric": 0.5653966665267944,
"best_model_checkpoint": "/mnt/beegfs/farid/mlora/outputs/xnli/aya-101/sw/rank4_lr5e-5/checkpoint-5500",
"epoch": 0.24445893089960888,
"eval_steps": 500,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.020371577574967405,
"grad_norm": 2.0201494693756104,
"learning_rate": 4.166666666666667e-05,
"loss": 1.1486,
"step": 500
},
{
"epoch": 0.020371577574967405,
"eval_accuracy": 0.37309236947791163,
"eval_f1": 0.3322283925001309,
"eval_loss": 1.0934925079345703,
"eval_runtime": 413.7386,
"eval_samples_per_second": 6.018,
"eval_steps_per_second": 0.377,
"step": 500
},
{
"epoch": 0.04074315514993481,
"grad_norm": 5.576170444488525,
"learning_rate": 4.62962962962963e-05,
"loss": 1.0336,
"step": 1000
},
{
"epoch": 0.04074315514993481,
"eval_accuracy": 0.6514056224899598,
"eval_f1": 0.6496455979119086,
"eval_loss": 0.8499080538749695,
"eval_runtime": 411.5812,
"eval_samples_per_second": 6.05,
"eval_steps_per_second": 0.379,
"step": 1000
},
{
"epoch": 0.06111473272490222,
"grad_norm": 7.646302700042725,
"learning_rate": 4.166666666666667e-05,
"loss": 0.8636,
"step": 1500
},
{
"epoch": 0.06111473272490222,
"eval_accuracy": 0.7269076305220884,
"eval_f1": 0.7287323684689043,
"eval_loss": 0.668684184551239,
"eval_runtime": 411.6805,
"eval_samples_per_second": 6.048,
"eval_steps_per_second": 0.379,
"step": 1500
},
{
"epoch": 0.08148631029986962,
"grad_norm": 11.417831420898438,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.7968,
"step": 2000
},
{
"epoch": 0.08148631029986962,
"eval_accuracy": 0.734136546184739,
"eval_f1": 0.7373689502738993,
"eval_loss": 0.6560496091842651,
"eval_runtime": 411.618,
"eval_samples_per_second": 6.049,
"eval_steps_per_second": 0.379,
"step": 2000
},
{
"epoch": 0.10185788787483703,
"grad_norm": 5.362828254699707,
"learning_rate": 3.240740740740741e-05,
"loss": 0.7744,
"step": 2500
},
{
"epoch": 0.10185788787483703,
"eval_accuracy": 0.7546184738955823,
"eval_f1": 0.7561677733171569,
"eval_loss": 0.6078478693962097,
"eval_runtime": 411.4482,
"eval_samples_per_second": 6.052,
"eval_steps_per_second": 0.379,
"step": 2500
},
{
"epoch": 0.12222946544980444,
"grad_norm": 5.366226673126221,
"learning_rate": 2.777777777777778e-05,
"loss": 0.7559,
"step": 3000
},
{
"epoch": 0.12222946544980444,
"eval_accuracy": 0.7570281124497992,
"eval_f1": 0.7592593531306621,
"eval_loss": 0.6063372492790222,
"eval_runtime": 411.1805,
"eval_samples_per_second": 6.056,
"eval_steps_per_second": 0.379,
"step": 3000
},
{
"epoch": 0.14260104302477183,
"grad_norm": 7.356509685516357,
"learning_rate": 2.314814814814815e-05,
"loss": 0.7419,
"step": 3500
},
{
"epoch": 0.14260104302477183,
"eval_accuracy": 0.7734939759036145,
"eval_f1": 0.7749800378134201,
"eval_loss": 0.5820600390434265,
"eval_runtime": 411.2963,
"eval_samples_per_second": 6.054,
"eval_steps_per_second": 0.379,
"step": 3500
},
{
"epoch": 0.16297262059973924,
"grad_norm": 6.393350124359131,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.7382,
"step": 4000
},
{
"epoch": 0.16297262059973924,
"eval_accuracy": 0.7746987951807229,
"eval_f1": 0.7750757863908069,
"eval_loss": 0.5675287842750549,
"eval_runtime": 411.5669,
"eval_samples_per_second": 6.05,
"eval_steps_per_second": 0.379,
"step": 4000
},
{
"epoch": 0.18334419817470665,
"grad_norm": 6.975522994995117,
"learning_rate": 1.388888888888889e-05,
"loss": 0.7207,
"step": 4500
},
{
"epoch": 0.18334419817470665,
"eval_accuracy": 0.7738955823293173,
"eval_f1": 0.7758231525059284,
"eval_loss": 0.5766967535018921,
"eval_runtime": 411.2036,
"eval_samples_per_second": 6.055,
"eval_steps_per_second": 0.379,
"step": 4500
},
{
"epoch": 0.20371577574967406,
"grad_norm": 6.244735240936279,
"learning_rate": 9.259259259259259e-06,
"loss": 0.7221,
"step": 5000
},
{
"epoch": 0.20371577574967406,
"eval_accuracy": 0.7819277108433735,
"eval_f1": 0.7828326523350859,
"eval_loss": 0.5711483359336853,
"eval_runtime": 432.5918,
"eval_samples_per_second": 5.756,
"eval_steps_per_second": 0.361,
"step": 5000
},
{
"epoch": 0.22408735332464147,
"grad_norm": 4.810456275939941,
"learning_rate": 4.6296296296296296e-06,
"loss": 0.7079,
"step": 5500
},
{
"epoch": 0.22408735332464147,
"eval_accuracy": 0.7823293172690763,
"eval_f1": 0.783225399873768,
"eval_loss": 0.5653966665267944,
"eval_runtime": 411.5268,
"eval_samples_per_second": 6.051,
"eval_steps_per_second": 0.379,
"step": 5500
},
{
"epoch": 0.24445893089960888,
"grad_norm": 7.606008052825928,
"learning_rate": 0.0,
"loss": 0.7073,
"step": 6000
},
{
"epoch": 0.24445893089960888,
"eval_accuracy": 0.7835341365461848,
"eval_f1": 0.7843248776628492,
"eval_loss": 0.5673787593841553,
"eval_runtime": 411.0953,
"eval_samples_per_second": 6.057,
"eval_steps_per_second": 0.379,
"step": 6000
}
],
"logging_steps": 500,
"max_steps": 6000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 8.03166870528e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}