|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 345, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 95.44927536231884, |
|
"eval_loss": 0.09570997208356857, |
|
"eval_rouge1": 72.6601, |
|
"eval_rouge2": 71.6824, |
|
"eval_rougeL": 72.6858, |
|
"eval_rougeLsum": 72.4668, |
|
"eval_runtime": 12.0064, |
|
"eval_samples_per_second": 5.747, |
|
"eval_steps_per_second": 1.499, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 92.01449275362319, |
|
"eval_loss": 0.13447459042072296, |
|
"eval_rouge1": 75.0063, |
|
"eval_rouge2": 74.0782, |
|
"eval_rougeL": 75.0597, |
|
"eval_rougeLsum": 74.8943, |
|
"eval_runtime": 11.945, |
|
"eval_samples_per_second": 5.776, |
|
"eval_steps_per_second": 1.507, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 85.46376811594203, |
|
"eval_loss": 0.14119356870651245, |
|
"eval_rouge1": 75.3012, |
|
"eval_rouge2": 74.5492, |
|
"eval_rougeL": 75.4246, |
|
"eval_rougeLsum": 75.324, |
|
"eval_runtime": 10.9494, |
|
"eval_samples_per_second": 6.302, |
|
"eval_steps_per_second": 1.644, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 85.04347826086956, |
|
"eval_loss": 0.10889122635126114, |
|
"eval_rouge1": 74.8426, |
|
"eval_rouge2": 74.0317, |
|
"eval_rougeL": 74.8939, |
|
"eval_rougeLsum": 74.8128, |
|
"eval_runtime": 11.2109, |
|
"eval_samples_per_second": 6.155, |
|
"eval_steps_per_second": 1.606, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 85.31884057971014, |
|
"eval_loss": 0.12416736036539078, |
|
"eval_rouge1": 75.3806, |
|
"eval_rouge2": 74.6735, |
|
"eval_rougeL": 75.5866, |
|
"eval_rougeLsum": 75.5446, |
|
"eval_runtime": 11.151, |
|
"eval_samples_per_second": 6.188, |
|
"eval_steps_per_second": 1.614, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 345, |
|
"total_flos": 2990604350914560.0, |
|
"train_loss": 0.036720043679942256, |
|
"train_runtime": 132.663, |
|
"train_samples_per_second": 10.402, |
|
"train_steps_per_second": 2.601 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 345, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 2990604350914560.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|