|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.998864926220204, |
|
"global_step": 3520, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7795322956613279, |
|
"eval_loss": 1.5038145780563354, |
|
"eval_runtime": 5.9639, |
|
"eval_samples_per_second": 15.258, |
|
"eval_steps_per_second": 7.713, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7964938287518932, |
|
"eval_loss": 1.3765002489089966, |
|
"eval_runtime": 5.973, |
|
"eval_samples_per_second": 15.235, |
|
"eval_steps_per_second": 7.701, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.579545454545455e-06, |
|
"loss": 1.5308, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8075258075258075, |
|
"eval_loss": 1.2920387983322144, |
|
"eval_runtime": 5.9722, |
|
"eval_samples_per_second": 15.237, |
|
"eval_steps_per_second": 7.702, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8155607833027188, |
|
"eval_loss": 1.230821967124939, |
|
"eval_runtime": 5.6615, |
|
"eval_samples_per_second": 16.073, |
|
"eval_steps_per_second": 8.125, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 7.161931818181819e-06, |
|
"loss": 1.2695, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8225860161344032, |
|
"eval_loss": 1.1788371801376343, |
|
"eval_runtime": 5.9852, |
|
"eval_samples_per_second": 15.204, |
|
"eval_steps_per_second": 7.686, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8278817956237311, |
|
"eval_loss": 1.136326789855957, |
|
"eval_runtime": 5.9723, |
|
"eval_samples_per_second": 15.237, |
|
"eval_steps_per_second": 7.702, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 5.741477272727272e-06, |
|
"loss": 1.1353, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8323826710923485, |
|
"eval_loss": 1.102668285369873, |
|
"eval_runtime": 5.9851, |
|
"eval_samples_per_second": 15.204, |
|
"eval_steps_per_second": 7.686, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8373239663562244, |
|
"eval_loss": 1.072572112083435, |
|
"eval_runtime": 5.9939, |
|
"eval_samples_per_second": 15.182, |
|
"eval_steps_per_second": 7.674, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8404928404928405, |
|
"eval_loss": 1.0481319427490234, |
|
"eval_runtime": 5.9927, |
|
"eval_samples_per_second": 15.185, |
|
"eval_steps_per_second": 7.676, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.321022727272728e-06, |
|
"loss": 1.0713, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8432642626191014, |
|
"eval_loss": 1.0299291610717773, |
|
"eval_runtime": 5.6745, |
|
"eval_samples_per_second": 16.037, |
|
"eval_steps_per_second": 8.106, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8455415552189746, |
|
"eval_loss": 1.0174473524093628, |
|
"eval_runtime": 5.9763, |
|
"eval_samples_per_second": 15.227, |
|
"eval_steps_per_second": 7.697, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 2.900568181818182e-06, |
|
"loss": 1.0233, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8477006864103638, |
|
"eval_loss": 1.0027512311935425, |
|
"eval_runtime": 5.9881, |
|
"eval_samples_per_second": 15.197, |
|
"eval_steps_per_second": 7.682, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8488178488178488, |
|
"eval_loss": 0.9938735961914062, |
|
"eval_runtime": 5.9759, |
|
"eval_samples_per_second": 15.228, |
|
"eval_steps_per_second": 7.698, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 1.4829545454545454e-06, |
|
"loss": 0.9811, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8496772045159142, |
|
"eval_loss": 0.9889363646507263, |
|
"eval_runtime": 5.9819, |
|
"eval_samples_per_second": 15.213, |
|
"eval_steps_per_second": 7.69, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8499779790102371, |
|
"eval_loss": 0.9854440093040466, |
|
"eval_runtime": 5.9887, |
|
"eval_samples_per_second": 15.195, |
|
"eval_steps_per_second": 7.681, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 6.250000000000001e-08, |
|
"loss": 0.9696, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8501928179347534, |
|
"eval_loss": 0.9834597110748291, |
|
"eval_runtime": 5.978, |
|
"eval_samples_per_second": 15.223, |
|
"eval_steps_per_second": 7.695, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 3520, |
|
"total_flos": 1.2269276173959168e+17, |
|
"train_loss": 1.138753395730799, |
|
"train_runtime": 5180.444, |
|
"train_samples_per_second": 5.442, |
|
"train_steps_per_second": 0.679 |
|
} |
|
], |
|
"max_steps": 3520, |
|
"num_train_epochs": 16, |
|
"total_flos": 1.2269276173959168e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|