|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 12.0, |
|
"eval_steps": 500, |
|
"global_step": 3936, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 15.577319587628866, |
|
"eval_loss": 8.096635818481445, |
|
"eval_rouge1": 16.5418, |
|
"eval_rouge2": 10.3523, |
|
"eval_rougeL": 13.972, |
|
"eval_rougeLsum": 14.1918, |
|
"eval_runtime": 157.1024, |
|
"eval_samples_per_second": 1.852, |
|
"eval_steps_per_second": 0.236, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.8394004282655248e-05, |
|
"loss": 18.3143, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 8.896907216494846, |
|
"eval_loss": 0.9259825944900513, |
|
"eval_rouge1": 31.5806, |
|
"eval_rouge2": 27.0287, |
|
"eval_rougeL": 29.6916, |
|
"eval_rougeLsum": 30.0327, |
|
"eval_runtime": 155.4292, |
|
"eval_samples_per_second": 1.872, |
|
"eval_steps_per_second": 0.238, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 13.807560137457045, |
|
"eval_loss": 0.7708158493041992, |
|
"eval_rouge1": 22.6847, |
|
"eval_rouge2": 15.805, |
|
"eval_rougeL": 19.6336, |
|
"eval_rougeLsum": 19.8945, |
|
"eval_runtime": 155.3127, |
|
"eval_samples_per_second": 1.874, |
|
"eval_steps_per_second": 0.238, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.5717344753747325e-05, |
|
"loss": 1.0739, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 9.618556701030927, |
|
"eval_loss": 0.7307576537132263, |
|
"eval_rouge1": 35.1675, |
|
"eval_rouge2": 27.3998, |
|
"eval_rougeL": 31.8527, |
|
"eval_rougeLsum": 32.0356, |
|
"eval_runtime": 133.4505, |
|
"eval_samples_per_second": 2.181, |
|
"eval_steps_per_second": 0.277, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 1.3040685224839403e-05, |
|
"loss": 0.8085, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 10.116838487972508, |
|
"eval_loss": 0.708371102809906, |
|
"eval_rouge1": 34.4346, |
|
"eval_rouge2": 26.202, |
|
"eval_rougeL": 30.8999, |
|
"eval_rougeLsum": 31.212, |
|
"eval_runtime": 139.0571, |
|
"eval_samples_per_second": 2.093, |
|
"eval_steps_per_second": 0.266, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 10.268041237113403, |
|
"eval_loss": 0.6923775672912598, |
|
"eval_rouge1": 34.3345, |
|
"eval_rouge2": 26.0144, |
|
"eval_rougeL": 30.692, |
|
"eval_rougeLsum": 31.0384, |
|
"eval_runtime": 141.1422, |
|
"eval_samples_per_second": 2.062, |
|
"eval_steps_per_second": 0.262, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.036402569593148e-05, |
|
"loss": 0.7597, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 10.31958762886598, |
|
"eval_loss": 0.6812536716461182, |
|
"eval_rouge1": 34.3854, |
|
"eval_rouge2": 26.0495, |
|
"eval_rougeL": 30.8335, |
|
"eval_rougeLsum": 31.1696, |
|
"eval_runtime": 143.6068, |
|
"eval_samples_per_second": 2.026, |
|
"eval_steps_per_second": 0.258, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 7.687366167023556e-06, |
|
"loss": 0.7442, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 10.360824742268042, |
|
"eval_loss": 0.6728662252426147, |
|
"eval_rouge1": 34.3758, |
|
"eval_rouge2": 26.0079, |
|
"eval_rougeL": 30.7863, |
|
"eval_rougeLsum": 31.1239, |
|
"eval_runtime": 142.7676, |
|
"eval_samples_per_second": 2.038, |
|
"eval_steps_per_second": 0.259, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 10.49828178694158, |
|
"eval_loss": 0.6669635772705078, |
|
"eval_rouge1": 34.2115, |
|
"eval_rouge2": 25.7443, |
|
"eval_rougeL": 30.5369, |
|
"eval_rougeLsum": 30.9282, |
|
"eval_runtime": 145.2485, |
|
"eval_samples_per_second": 2.003, |
|
"eval_steps_per_second": 0.255, |
|
"step": 2952 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 5.010706638115633e-06, |
|
"loss": 0.7252, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 10.529209621993127, |
|
"eval_loss": 0.6624875664710999, |
|
"eval_rouge1": 34.2518, |
|
"eval_rouge2": 25.7147, |
|
"eval_rougeL": 30.5433, |
|
"eval_rougeLsum": 30.9116, |
|
"eval_runtime": 146.9477, |
|
"eval_samples_per_second": 1.98, |
|
"eval_steps_per_second": 0.252, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 2.334047109207709e-06, |
|
"loss": 0.7168, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_gen_len": 10.618556701030927, |
|
"eval_loss": 0.6601226925849915, |
|
"eval_rouge1": 34.0539, |
|
"eval_rouge2": 25.5073, |
|
"eval_rougeL": 30.329, |
|
"eval_rougeLsum": 30.6828, |
|
"eval_runtime": 146.8209, |
|
"eval_samples_per_second": 1.982, |
|
"eval_steps_per_second": 0.252, |
|
"step": 3608 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 10.532646048109966, |
|
"eval_loss": 0.6593531370162964, |
|
"eval_rouge1": 34.2696, |
|
"eval_rouge2": 25.7973, |
|
"eval_rougeL": 30.5609, |
|
"eval_rougeLsum": 30.9651, |
|
"eval_runtime": 145.5529, |
|
"eval_samples_per_second": 1.999, |
|
"eval_steps_per_second": 0.254, |
|
"step": 3936 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3936, |
|
"num_train_epochs": 12, |
|
"save_steps": 500, |
|
"total_flos": 2.949770616687821e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|