|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.789564097058193, |
|
"eval_steps": 500, |
|
"global_step": 720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001, |
|
"loss": 8.8727, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 0.0, |
|
"eval_bp": 0.0002, |
|
"eval_counts_1": 2198, |
|
"eval_counts_2": 0, |
|
"eval_counts_3": 0, |
|
"eval_counts_4": 0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0, |
|
"eval_gen_len": 2.0, |
|
"eval_loss": 6.380987644195557, |
|
"eval_precisions_1": 99.7278, |
|
"eval_precisions_2": 0.0, |
|
"eval_precisions_3": 0.0, |
|
"eval_precisions_4": 0.0, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 575.1236, |
|
"eval_samples_per_second": 3.832, |
|
"eval_steps_per_second": 1.916, |
|
"eval_sys_len": 2204, |
|
"eval_totals_1": 2204, |
|
"eval_totals_2": 0, |
|
"eval_totals_3": 0, |
|
"eval_totals_4": 0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0001, |
|
"loss": 6.0165, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_bleu": 0.0851, |
|
"eval_bp": 1.0, |
|
"eval_counts_1": 3587, |
|
"eval_counts_2": 137, |
|
"eval_counts_3": 0, |
|
"eval_counts_4": 0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.073, |
|
"eval_gen_len": 15.0091, |
|
"eval_loss": 5.386419773101807, |
|
"eval_precisions_1": 16.3342, |
|
"eval_precisions_2": 0.6935, |
|
"eval_precisions_3": 0.0028, |
|
"eval_precisions_4": 0.0016, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0702, |
|
"eval_rouge2": 0.0079, |
|
"eval_rougeL": 0.07, |
|
"eval_rougeLsum": 0.07, |
|
"eval_runtime": 2466.4687, |
|
"eval_samples_per_second": 0.894, |
|
"eval_steps_per_second": 0.447, |
|
"eval_sys_len": 21960, |
|
"eval_totals_1": 21960, |
|
"eval_totals_2": 19756, |
|
"eval_totals_3": 17552, |
|
"eval_totals_4": 15348, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0001, |
|
"loss": 5.1537, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.13, |
|
"eval_bp": 0.6246, |
|
"eval_counts_1": 3601, |
|
"eval_counts_2": 145, |
|
"eval_counts_3": 1, |
|
"eval_counts_4": 0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0926, |
|
"eval_gen_len": 9.5309, |
|
"eval_loss": 4.961660861968994, |
|
"eval_precisions_1": 24.9221, |
|
"eval_precisions_2": 1.1842, |
|
"eval_precisions_3": 0.01, |
|
"eval_precisions_4": 0.0064, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0882, |
|
"eval_rouge2": 0.0107, |
|
"eval_rougeL": 0.0877, |
|
"eval_rougeLsum": 0.0876, |
|
"eval_runtime": 2425.7875, |
|
"eval_samples_per_second": 0.909, |
|
"eval_steps_per_second": 0.454, |
|
"eval_sys_len": 14449, |
|
"eval_totals_1": 14449, |
|
"eval_totals_2": 12245, |
|
"eval_totals_3": 10041, |
|
"eval_totals_4": 7837, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.0001, |
|
"loss": 4.863, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_bleu": 0.1468, |
|
"eval_bp": 1.0, |
|
"eval_counts_1": 4590, |
|
"eval_counts_2": 229, |
|
"eval_counts_3": 19, |
|
"eval_counts_4": 0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0836, |
|
"eval_gen_len": 29.4528, |
|
"eval_loss": 4.553112030029297, |
|
"eval_precisions_1": 11.0141, |
|
"eval_precisions_2": 0.5802, |
|
"eval_precisions_3": 0.051, |
|
"eval_precisions_4": 0.0014, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0811, |
|
"eval_rouge2": 0.0081, |
|
"eval_rougeL": 0.0768, |
|
"eval_rougeLsum": 0.0767, |
|
"eval_runtime": 4425.6343, |
|
"eval_samples_per_second": 0.498, |
|
"eval_steps_per_second": 0.249, |
|
"eval_sys_len": 41674, |
|
"eval_totals_1": 41674, |
|
"eval_totals_2": 39470, |
|
"eval_totals_3": 37266, |
|
"eval_totals_4": 35062, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0001, |
|
"loss": 4.5201, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_bleu": 0.2845, |
|
"eval_bp": 0.7265, |
|
"eval_counts_1": 3643, |
|
"eval_counts_2": 169, |
|
"eval_counts_3": 19, |
|
"eval_counts_4": 0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0907, |
|
"eval_gen_len": 12.5077, |
|
"eval_loss": 4.201998233795166, |
|
"eval_precisions_1": 22.6217, |
|
"eval_precisions_2": 1.2158, |
|
"eval_precisions_3": 0.1624, |
|
"eval_precisions_4": 0.0053, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0865, |
|
"eval_rouge2": 0.0115, |
|
"eval_rougeL": 0.0856, |
|
"eval_rougeLsum": 0.0855, |
|
"eval_runtime": 2637.7264, |
|
"eval_samples_per_second": 0.836, |
|
"eval_steps_per_second": 0.418, |
|
"eval_sys_len": 16104, |
|
"eval_totals_1": 16104, |
|
"eval_totals_2": 13900, |
|
"eval_totals_3": 11696, |
|
"eval_totals_4": 9492, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0001, |
|
"loss": 4.1347, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_bleu": 0.2878, |
|
"eval_bp": 0.7671, |
|
"eval_counts_1": 3670, |
|
"eval_counts_2": 167, |
|
"eval_counts_3": 20, |
|
"eval_counts_4": 0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0917, |
|
"eval_gen_len": 13.1656, |
|
"eval_loss": 3.9352548122406006, |
|
"eval_precisions_1": 21.8504, |
|
"eval_precisions_2": 1.1445, |
|
"eval_precisions_3": 0.1614, |
|
"eval_precisions_4": 0.0049, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.087, |
|
"eval_rouge2": 0.0114, |
|
"eval_rougeL": 0.0859, |
|
"eval_rougeLsum": 0.0858, |
|
"eval_runtime": 3199.7795, |
|
"eval_samples_per_second": 0.689, |
|
"eval_steps_per_second": 0.344, |
|
"eval_sys_len": 16796, |
|
"eval_totals_1": 16796, |
|
"eval_totals_2": 14592, |
|
"eval_totals_3": 12388, |
|
"eval_totals_4": 10184, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.0001, |
|
"loss": 4.012, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_bleu": 0.4139, |
|
"eval_bp": 0.7546, |
|
"eval_counts_1": 3780, |
|
"eval_counts_2": 198, |
|
"eval_counts_3": 35, |
|
"eval_counts_4": 1, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0968, |
|
"eval_gen_len": 12.2931, |
|
"eval_loss": 3.759270191192627, |
|
"eval_precisions_1": 22.7958, |
|
"eval_precisions_2": 1.3771, |
|
"eval_precisions_3": 0.2875, |
|
"eval_precisions_4": 0.01, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0916, |
|
"eval_rouge2": 0.0128, |
|
"eval_rougeL": 0.0903, |
|
"eval_rougeLsum": 0.0902, |
|
"eval_runtime": 2449.2398, |
|
"eval_samples_per_second": 0.9, |
|
"eval_steps_per_second": 0.45, |
|
"eval_sys_len": 16582, |
|
"eval_totals_1": 16582, |
|
"eval_totals_2": 14378, |
|
"eval_totals_3": 12174, |
|
"eval_totals_4": 9970, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001, |
|
"loss": 3.7048, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.5493, |
|
"eval_bp": 0.7297, |
|
"eval_counts_1": 3668, |
|
"eval_counts_2": 205, |
|
"eval_counts_3": 36, |
|
"eval_counts_4": 3, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0923, |
|
"eval_gen_len": 11.7568, |
|
"eval_loss": 3.603360414505005, |
|
"eval_precisions_1": 22.7008, |
|
"eval_precisions_2": 1.4691, |
|
"eval_precisions_3": 0.3064, |
|
"eval_precisions_4": 0.0314, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0882, |
|
"eval_rouge2": 0.0134, |
|
"eval_rougeL": 0.0873, |
|
"eval_rougeLsum": 0.0872, |
|
"eval_runtime": 2487.6786, |
|
"eval_samples_per_second": 0.886, |
|
"eval_steps_per_second": 0.443, |
|
"eval_sys_len": 16158, |
|
"eval_totals_1": 16158, |
|
"eval_totals_2": 13954, |
|
"eval_totals_3": 11750, |
|
"eval_totals_4": 9546, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.0001, |
|
"loss": 3.6284, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_bleu": 1.8083, |
|
"eval_bp": 0.8048, |
|
"eval_counts_1": 4070, |
|
"eval_counts_2": 527, |
|
"eval_counts_3": 160, |
|
"eval_counts_4": 28, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.1152, |
|
"eval_gen_len": 9.7777, |
|
"eval_loss": 3.4566922187805176, |
|
"eval_precisions_1": 23.3118, |
|
"eval_precisions_2": 3.4546, |
|
"eval_precisions_3": 1.226, |
|
"eval_precisions_4": 0.2581, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1109, |
|
"eval_rouge2": 0.0281, |
|
"eval_rougeL": 0.1083, |
|
"eval_rougeLsum": 0.1082, |
|
"eval_runtime": 2473.1224, |
|
"eval_samples_per_second": 0.891, |
|
"eval_steps_per_second": 0.446, |
|
"eval_sys_len": 17459, |
|
"eval_totals_1": 17459, |
|
"eval_totals_2": 15255, |
|
"eval_totals_3": 13051, |
|
"eval_totals_4": 10847, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 0.0001, |
|
"loss": 3.4605, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_bleu": 1.6972, |
|
"eval_bp": 0.8793, |
|
"eval_counts_1": 4325, |
|
"eval_counts_2": 512, |
|
"eval_counts_3": 128, |
|
"eval_counts_4": 27, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.1254, |
|
"eval_gen_len": 12.6729, |
|
"eval_loss": 3.339012622833252, |
|
"eval_precisions_1": 22.9699, |
|
"eval_precisions_2": 3.0797, |
|
"eval_precisions_3": 0.8876, |
|
"eval_precisions_4": 0.221, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1206, |
|
"eval_rouge2": 0.0288, |
|
"eval_rougeL": 0.1168, |
|
"eval_rougeLsum": 0.1167, |
|
"eval_runtime": 2821.508, |
|
"eval_samples_per_second": 0.781, |
|
"eval_steps_per_second": 0.391, |
|
"eval_sys_len": 18829, |
|
"eval_totals_1": 18829, |
|
"eval_totals_2": 16625, |
|
"eval_totals_3": 14421, |
|
"eval_totals_4": 12217, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0001, |
|
"loss": 3.2267, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_bleu": 2.5735, |
|
"eval_bp": 0.8779, |
|
"eval_counts_1": 4498, |
|
"eval_counts_2": 774, |
|
"eval_counts_3": 237, |
|
"eval_counts_4": 49, |
|
"eval_exact_match": 0.0005, |
|
"eval_f1": 0.1381, |
|
"eval_gen_len": 11.5009, |
|
"eval_loss": 3.199504852294922, |
|
"eval_precisions_1": 23.923, |
|
"eval_precisions_2": 4.6632, |
|
"eval_precisions_3": 1.6465, |
|
"eval_precisions_4": 0.402, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1348, |
|
"eval_rouge2": 0.0405, |
|
"eval_rougeL": 0.132, |
|
"eval_rougeLsum": 0.1319, |
|
"eval_runtime": 2727.7858, |
|
"eval_samples_per_second": 0.808, |
|
"eval_steps_per_second": 0.404, |
|
"eval_sys_len": 18802, |
|
"eval_totals_1": 18802, |
|
"eval_totals_2": 16598, |
|
"eval_totals_3": 14394, |
|
"eval_totals_4": 12190, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 0.0001, |
|
"loss": 3.1761, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"eval_bleu": 2.7554, |
|
"eval_bp": 0.7767, |
|
"eval_counts_1": 4578, |
|
"eval_counts_2": 866, |
|
"eval_counts_3": 260, |
|
"eval_counts_4": 50, |
|
"eval_exact_match": 0.0005, |
|
"eval_f1": 0.1492, |
|
"eval_gen_len": 10.5172, |
|
"eval_loss": 3.116502523422241, |
|
"eval_precisions_1": 26.9882, |
|
"eval_precisions_2": 5.8676, |
|
"eval_precisions_3": 2.0709, |
|
"eval_precisions_4": 0.483, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1454, |
|
"eval_rouge2": 0.0464, |
|
"eval_rougeL": 0.1426, |
|
"eval_rougeLsum": 0.1427, |
|
"eval_runtime": 2404.4354, |
|
"eval_samples_per_second": 0.917, |
|
"eval_steps_per_second": 0.458, |
|
"eval_sys_len": 16963, |
|
"eval_totals_1": 16963, |
|
"eval_totals_2": 14759, |
|
"eval_totals_3": 12555, |
|
"eval_totals_4": 10351, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 0.0001, |
|
"loss": 3.0323, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"eval_bleu": 3.2318, |
|
"eval_bp": 0.839, |
|
"eval_counts_1": 5019, |
|
"eval_counts_2": 1048, |
|
"eval_counts_3": 319, |
|
"eval_counts_4": 59, |
|
"eval_exact_match": 0.0009, |
|
"eval_f1": 0.1729, |
|
"eval_gen_len": 12.8294, |
|
"eval_loss": 3.0074305534362793, |
|
"eval_precisions_1": 27.7646, |
|
"eval_precisions_2": 6.6024, |
|
"eval_precisions_3": 2.3337, |
|
"eval_precisions_4": 0.5146, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1691, |
|
"eval_rouge2": 0.0557, |
|
"eval_rougeL": 0.1648, |
|
"eval_rougeLsum": 0.1647, |
|
"eval_runtime": 1416.83, |
|
"eval_samples_per_second": 1.556, |
|
"eval_steps_per_second": 0.778, |
|
"eval_sys_len": 18077, |
|
"eval_totals_1": 18077, |
|
"eval_totals_2": 15873, |
|
"eval_totals_3": 13669, |
|
"eval_totals_4": 11465, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.8223, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_bleu": 3.7161, |
|
"eval_bp": 0.783, |
|
"eval_counts_1": 5257, |
|
"eval_counts_2": 1120, |
|
"eval_counts_3": 341, |
|
"eval_counts_4": 85, |
|
"eval_exact_match": 0.0018, |
|
"eval_f1": 0.1929, |
|
"eval_gen_len": 12.6824, |
|
"eval_loss": 2.891127109527588, |
|
"eval_precisions_1": 30.7895, |
|
"eval_precisions_2": 7.5319, |
|
"eval_precisions_3": 2.6922, |
|
"eval_precisions_4": 0.8125, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.189, |
|
"eval_rouge2": 0.0635, |
|
"eval_rougeL": 0.1841, |
|
"eval_rougeLsum": 0.184, |
|
"eval_runtime": 2341.3383, |
|
"eval_samples_per_second": 0.941, |
|
"eval_steps_per_second": 0.471, |
|
"eval_sys_len": 17074, |
|
"eval_totals_1": 17074, |
|
"eval_totals_2": 14870, |
|
"eval_totals_3": 12666, |
|
"eval_totals_4": 10462, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"learning_rate": 0.0001, |
|
"loss": 2.7732, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_bleu": 4.3667, |
|
"eval_bp": 0.8229, |
|
"eval_counts_1": 5616, |
|
"eval_counts_2": 1271, |
|
"eval_counts_3": 407, |
|
"eval_counts_4": 113, |
|
"eval_exact_match": 0.0045, |
|
"eval_f1": 0.217, |
|
"eval_gen_len": 13.0944, |
|
"eval_loss": 2.8103041648864746, |
|
"eval_precisions_1": 31.5789, |
|
"eval_precisions_2": 8.1579, |
|
"eval_precisions_3": 3.0428, |
|
"eval_precisions_4": 1.0115, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2122, |
|
"eval_rouge2": 0.0731, |
|
"eval_rougeL": 0.2063, |
|
"eval_rougeLsum": 0.2061, |
|
"eval_runtime": 2877.1799, |
|
"eval_samples_per_second": 0.766, |
|
"eval_steps_per_second": 0.383, |
|
"eval_sys_len": 17784, |
|
"eval_totals_1": 17784, |
|
"eval_totals_2": 15580, |
|
"eval_totals_3": 13376, |
|
"eval_totals_4": 11172, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.58, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 5.357, |
|
"eval_bp": 0.8782, |
|
"eval_counts_1": 5959, |
|
"eval_counts_2": 1461, |
|
"eval_counts_3": 510, |
|
"eval_counts_4": 171, |
|
"eval_exact_match": 0.0064, |
|
"eval_f1": 0.2316, |
|
"eval_gen_len": 13.9174, |
|
"eval_loss": 2.718313694000244, |
|
"eval_precisions_1": 31.6833, |
|
"eval_precisions_2": 8.7991, |
|
"eval_precisions_3": 3.5417, |
|
"eval_precisions_4": 1.4021, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2286, |
|
"eval_rouge2": 0.0822, |
|
"eval_rougeL": 0.2214, |
|
"eval_rougeLsum": 0.2212, |
|
"eval_runtime": 2978.2825, |
|
"eval_samples_per_second": 0.74, |
|
"eval_steps_per_second": 0.37, |
|
"eval_sys_len": 18808, |
|
"eval_totals_1": 18808, |
|
"eval_totals_2": 16604, |
|
"eval_totals_3": 14400, |
|
"eval_totals_4": 12196, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.5368, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_bleu": 5.8686, |
|
"eval_bp": 0.7744, |
|
"eval_counts_1": 5935, |
|
"eval_counts_2": 1543, |
|
"eval_counts_3": 576, |
|
"eval_counts_4": 201, |
|
"eval_exact_match": 0.0059, |
|
"eval_f1": 0.2377, |
|
"eval_gen_len": 12.3185, |
|
"eval_loss": 2.6629750728607178, |
|
"eval_precisions_1": 35.0706, |
|
"eval_precisions_2": 10.483, |
|
"eval_precisions_3": 4.6025, |
|
"eval_precisions_4": 1.9494, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2365, |
|
"eval_rouge2": 0.089, |
|
"eval_rougeL": 0.2309, |
|
"eval_rougeLsum": 0.2307, |
|
"eval_runtime": 2677.1671, |
|
"eval_samples_per_second": 0.823, |
|
"eval_steps_per_second": 0.412, |
|
"eval_sys_len": 16923, |
|
"eval_totals_1": 16923, |
|
"eval_totals_2": 14719, |
|
"eval_totals_3": 12515, |
|
"eval_totals_4": 10311, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"learning_rate": 0.0001, |
|
"loss": 2.4325, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"eval_bleu": 6.8664, |
|
"eval_bp": 0.8277, |
|
"eval_counts_1": 6305, |
|
"eval_counts_2": 1756, |
|
"eval_counts_3": 685, |
|
"eval_counts_4": 265, |
|
"eval_exact_match": 0.0059, |
|
"eval_f1": 0.2537, |
|
"eval_gen_len": 13.1688, |
|
"eval_loss": 2.579846143722534, |
|
"eval_precisions_1": 35.2826, |
|
"eval_precisions_2": 11.209, |
|
"eval_precisions_3": 5.0884, |
|
"eval_precisions_4": 2.3539, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2518, |
|
"eval_rouge2": 0.0982, |
|
"eval_rougeL": 0.2452, |
|
"eval_rougeLsum": 0.2452, |
|
"eval_runtime": 2086.3742, |
|
"eval_samples_per_second": 1.056, |
|
"eval_steps_per_second": 0.528, |
|
"eval_sys_len": 17870, |
|
"eval_totals_1": 17870, |
|
"eval_totals_2": 15666, |
|
"eval_totals_3": 13462, |
|
"eval_totals_4": 11258, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.2632, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_bleu": 7.5129, |
|
"eval_bp": 0.823, |
|
"eval_counts_1": 6577, |
|
"eval_counts_2": 1888, |
|
"eval_counts_3": 762, |
|
"eval_counts_4": 304, |
|
"eval_exact_match": 0.0086, |
|
"eval_f1": 0.2702, |
|
"eval_gen_len": 13.2373, |
|
"eval_loss": 2.515482187271118, |
|
"eval_precisions_1": 36.9806, |
|
"eval_precisions_2": 12.1173, |
|
"eval_precisions_3": 5.6963, |
|
"eval_precisions_4": 2.7208, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2689, |
|
"eval_rouge2": 0.1102, |
|
"eval_rougeL": 0.261, |
|
"eval_rougeLsum": 0.2611, |
|
"eval_runtime": 2869.1423, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.384, |
|
"eval_sys_len": 17785, |
|
"eval_totals_1": 17785, |
|
"eval_totals_2": 15581, |
|
"eval_totals_3": 13377, |
|
"eval_totals_4": 11173, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 0.0001, |
|
"loss": 2.2026, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"eval_bleu": 7.1987, |
|
"eval_bp": 0.8159, |
|
"eval_counts_1": 6644, |
|
"eval_counts_2": 1853, |
|
"eval_counts_3": 720, |
|
"eval_counts_4": 273, |
|
"eval_exact_match": 0.0073, |
|
"eval_f1": 0.2742, |
|
"eval_gen_len": 13.6343, |
|
"eval_loss": 2.499704122543335, |
|
"eval_precisions_1": 37.626, |
|
"eval_precisions_2": 11.9904, |
|
"eval_precisions_3": 5.434, |
|
"eval_precisions_4": 2.4715, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2717, |
|
"eval_rouge2": 0.1097, |
|
"eval_rougeL": 0.2628, |
|
"eval_rougeLsum": 0.2625, |
|
"eval_runtime": 2871.4974, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.384, |
|
"eval_sys_len": 17658, |
|
"eval_totals_1": 17658, |
|
"eval_totals_2": 15454, |
|
"eval_totals_3": 13250, |
|
"eval_totals_4": 11046, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"step": 720, |
|
"total_flos": 8.496574887886848e+17, |
|
"train_loss": 3.782369862662421, |
|
"train_runtime": 109409.7996, |
|
"train_samples_per_second": 1.703, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 720, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8.496574887886848e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|