pep_summarization / trainer_state.json
jpodivin's picture
End of training
4747369 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 345,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_gen_len": 95.44927536231884,
"eval_loss": 0.09570997208356857,
"eval_rouge1": 72.6601,
"eval_rouge2": 71.6824,
"eval_rougeL": 72.6858,
"eval_rougeLsum": 72.4668,
"eval_runtime": 12.0064,
"eval_samples_per_second": 5.747,
"eval_steps_per_second": 1.499,
"step": 69
},
{
"epoch": 2.0,
"eval_gen_len": 92.01449275362319,
"eval_loss": 0.13447459042072296,
"eval_rouge1": 75.0063,
"eval_rouge2": 74.0782,
"eval_rougeL": 75.0597,
"eval_rougeLsum": 74.8943,
"eval_runtime": 11.945,
"eval_samples_per_second": 5.776,
"eval_steps_per_second": 1.507,
"step": 138
},
{
"epoch": 3.0,
"eval_gen_len": 85.46376811594203,
"eval_loss": 0.14119356870651245,
"eval_rouge1": 75.3012,
"eval_rouge2": 74.5492,
"eval_rougeL": 75.4246,
"eval_rougeLsum": 75.324,
"eval_runtime": 10.9494,
"eval_samples_per_second": 6.302,
"eval_steps_per_second": 1.644,
"step": 207
},
{
"epoch": 4.0,
"eval_gen_len": 85.04347826086956,
"eval_loss": 0.10889122635126114,
"eval_rouge1": 74.8426,
"eval_rouge2": 74.0317,
"eval_rougeL": 74.8939,
"eval_rougeLsum": 74.8128,
"eval_runtime": 11.2109,
"eval_samples_per_second": 6.155,
"eval_steps_per_second": 1.606,
"step": 276
},
{
"epoch": 5.0,
"eval_gen_len": 85.31884057971014,
"eval_loss": 0.12416736036539078,
"eval_rouge1": 75.3806,
"eval_rouge2": 74.6735,
"eval_rougeL": 75.5866,
"eval_rougeLsum": 75.5446,
"eval_runtime": 11.151,
"eval_samples_per_second": 6.188,
"eval_steps_per_second": 1.614,
"step": 345
},
{
"epoch": 5.0,
"step": 345,
"total_flos": 2990604350914560.0,
"train_loss": 0.036720043679942256,
"train_runtime": 132.663,
"train_samples_per_second": 10.402,
"train_steps_per_second": 2.601
}
],
"logging_steps": 500,
"max_steps": 345,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 2990604350914560.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}