|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 2624, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 10.017182130584192, |
|
"eval_loss": 0.6921027898788452, |
|
"eval_rouge1": 34.9112, |
|
"eval_rouge2": 26.7503, |
|
"eval_rougeL": 31.4124, |
|
"eval_rougeLsum": 31.7295, |
|
"eval_runtime": 139.485, |
|
"eval_samples_per_second": 2.086, |
|
"eval_steps_per_second": 0.265, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0003125, |
|
"loss": 6.8746, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 10.845360824742269, |
|
"eval_loss": 0.6025161147117615, |
|
"eval_rouge1": 33.9134, |
|
"eval_rouge2": 25.3236, |
|
"eval_rougeL": 30.1968, |
|
"eval_rougeLsum": 30.472, |
|
"eval_runtime": 151.0112, |
|
"eval_samples_per_second": 1.927, |
|
"eval_steps_per_second": 0.245, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 11.872852233676976, |
|
"eval_loss": 0.5686607956886292, |
|
"eval_rouge1": 31.6178, |
|
"eval_rouge2": 22.9463, |
|
"eval_rougeL": 27.8758, |
|
"eval_rougeLsum": 28.3572, |
|
"eval_runtime": 165.9096, |
|
"eval_samples_per_second": 1.754, |
|
"eval_steps_per_second": 0.223, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0004853133981334999, |
|
"loss": 0.6462, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 12.130584192439862, |
|
"eval_loss": 0.5354723334312439, |
|
"eval_rouge1": 30.8157, |
|
"eval_rouge2": 22.1783, |
|
"eval_rougeL": 27.1641, |
|
"eval_rougeLsum": 27.569, |
|
"eval_runtime": 160.3562, |
|
"eval_samples_per_second": 1.815, |
|
"eval_steps_per_second": 0.231, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.00033927030930209996, |
|
"loss": 0.5618, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 12.50171821305842, |
|
"eval_loss": 0.5159749388694763, |
|
"eval_rouge1": 29.9183, |
|
"eval_rouge2": 21.0842, |
|
"eval_rougeL": 26.1671, |
|
"eval_rougeLsum": 26.5965, |
|
"eval_runtime": 158.4069, |
|
"eval_samples_per_second": 1.837, |
|
"eval_steps_per_second": 0.234, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 12.508591065292096, |
|
"eval_loss": 0.5024679899215698, |
|
"eval_rouge1": 29.7823, |
|
"eval_rouge2": 21.1443, |
|
"eval_rougeL": 26.0286, |
|
"eval_rougeLsum": 26.5215, |
|
"eval_runtime": 154.9067, |
|
"eval_samples_per_second": 1.879, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.0001310131517407316, |
|
"loss": 0.498, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 12.896907216494846, |
|
"eval_loss": 0.4978101849555969, |
|
"eval_rouge1": 29.1043, |
|
"eval_rouge2": 20.2391, |
|
"eval_rougeL": 25.3347, |
|
"eval_rougeLsum": 25.804, |
|
"eval_runtime": 152.5447, |
|
"eval_samples_per_second": 1.908, |
|
"eval_steps_per_second": 0.243, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 5.680048251060194e-06, |
|
"loss": 0.4551, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 12.738831615120274, |
|
"eval_loss": 0.497775673866272, |
|
"eval_rouge1": 29.5362, |
|
"eval_rouge2": 20.6621, |
|
"eval_rougeL": 25.7689, |
|
"eval_rougeLsum": 26.2351, |
|
"eval_runtime": 159.8028, |
|
"eval_samples_per_second": 1.821, |
|
"eval_steps_per_second": 0.232, |
|
"step": 2624 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2624, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 1.966513744458547e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|