PiccoviralesGPT / trainer_state.json
avuhong's picture
Upload 9 files
a341115
raw
history blame
5.35 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.998864926220204,
"global_step": 3520,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.7795322956613279,
"eval_loss": 1.5038145780563354,
"eval_runtime": 5.9639,
"eval_samples_per_second": 15.258,
"eval_steps_per_second": 7.713,
"step": 220
},
{
"epoch": 2.0,
"eval_accuracy": 0.7964938287518932,
"eval_loss": 1.3765002489089966,
"eval_runtime": 5.973,
"eval_samples_per_second": 15.235,
"eval_steps_per_second": 7.701,
"step": 440
},
{
"epoch": 2.27,
"learning_rate": 8.579545454545455e-06,
"loss": 1.5308,
"step": 500
},
{
"epoch": 3.0,
"eval_accuracy": 0.8075258075258075,
"eval_loss": 1.2920387983322144,
"eval_runtime": 5.9722,
"eval_samples_per_second": 15.237,
"eval_steps_per_second": 7.702,
"step": 660
},
{
"epoch": 4.0,
"eval_accuracy": 0.8155607833027188,
"eval_loss": 1.230821967124939,
"eval_runtime": 5.6615,
"eval_samples_per_second": 16.073,
"eval_steps_per_second": 8.125,
"step": 880
},
{
"epoch": 4.54,
"learning_rate": 7.161931818181819e-06,
"loss": 1.2695,
"step": 1000
},
{
"epoch": 5.0,
"eval_accuracy": 0.8225860161344032,
"eval_loss": 1.1788371801376343,
"eval_runtime": 5.9852,
"eval_samples_per_second": 15.204,
"eval_steps_per_second": 7.686,
"step": 1100
},
{
"epoch": 6.0,
"eval_accuracy": 0.8278817956237311,
"eval_loss": 1.136326789855957,
"eval_runtime": 5.9723,
"eval_samples_per_second": 15.237,
"eval_steps_per_second": 7.702,
"step": 1320
},
{
"epoch": 6.82,
"learning_rate": 5.741477272727272e-06,
"loss": 1.1353,
"step": 1500
},
{
"epoch": 7.0,
"eval_accuracy": 0.8323826710923485,
"eval_loss": 1.102668285369873,
"eval_runtime": 5.9851,
"eval_samples_per_second": 15.204,
"eval_steps_per_second": 7.686,
"step": 1540
},
{
"epoch": 8.0,
"eval_accuracy": 0.8373239663562244,
"eval_loss": 1.072572112083435,
"eval_runtime": 5.9939,
"eval_samples_per_second": 15.182,
"eval_steps_per_second": 7.674,
"step": 1760
},
{
"epoch": 9.0,
"eval_accuracy": 0.8404928404928405,
"eval_loss": 1.0481319427490234,
"eval_runtime": 5.9927,
"eval_samples_per_second": 15.185,
"eval_steps_per_second": 7.676,
"step": 1980
},
{
"epoch": 9.09,
"learning_rate": 4.321022727272728e-06,
"loss": 1.0713,
"step": 2000
},
{
"epoch": 10.0,
"eval_accuracy": 0.8432642626191014,
"eval_loss": 1.0299291610717773,
"eval_runtime": 5.6745,
"eval_samples_per_second": 16.037,
"eval_steps_per_second": 8.106,
"step": 2200
},
{
"epoch": 11.0,
"eval_accuracy": 0.8455415552189746,
"eval_loss": 1.0174473524093628,
"eval_runtime": 5.9763,
"eval_samples_per_second": 15.227,
"eval_steps_per_second": 7.697,
"step": 2420
},
{
"epoch": 11.36,
"learning_rate": 2.900568181818182e-06,
"loss": 1.0233,
"step": 2500
},
{
"epoch": 12.0,
"eval_accuracy": 0.8477006864103638,
"eval_loss": 1.0027512311935425,
"eval_runtime": 5.9881,
"eval_samples_per_second": 15.197,
"eval_steps_per_second": 7.682,
"step": 2640
},
{
"epoch": 13.0,
"eval_accuracy": 0.8488178488178488,
"eval_loss": 0.9938735961914062,
"eval_runtime": 5.9759,
"eval_samples_per_second": 15.228,
"eval_steps_per_second": 7.698,
"step": 2860
},
{
"epoch": 13.64,
"learning_rate": 1.4829545454545454e-06,
"loss": 0.9811,
"step": 3000
},
{
"epoch": 14.0,
"eval_accuracy": 0.8496772045159142,
"eval_loss": 0.9889363646507263,
"eval_runtime": 5.9819,
"eval_samples_per_second": 15.213,
"eval_steps_per_second": 7.69,
"step": 3080
},
{
"epoch": 15.0,
"eval_accuracy": 0.8499779790102371,
"eval_loss": 0.9854440093040466,
"eval_runtime": 5.9887,
"eval_samples_per_second": 15.195,
"eval_steps_per_second": 7.681,
"step": 3300
},
{
"epoch": 15.91,
"learning_rate": 6.250000000000001e-08,
"loss": 0.9696,
"step": 3500
},
{
"epoch": 16.0,
"eval_accuracy": 0.8501928179347534,
"eval_loss": 0.9834597110748291,
"eval_runtime": 5.978,
"eval_samples_per_second": 15.223,
"eval_steps_per_second": 7.695,
"step": 3520
},
{
"epoch": 16.0,
"step": 3520,
"total_flos": 1.2269276173959168e+17,
"train_loss": 1.138753395730799,
"train_runtime": 5180.444,
"train_samples_per_second": 5.442,
"train_steps_per_second": 0.679
}
],
"max_steps": 3520,
"num_train_epochs": 16,
"total_flos": 1.2269276173959168e+17,
"trial_name": null,
"trial_params": null
}