|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 181.8181818181818, |
|
"eval_steps": 500, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.95, |
|
"eval_gen_len": 13.1091, |
|
"eval_loss": 21.40757179260254, |
|
"eval_rouge1": 0.0745, |
|
"eval_rouge2": 0.0159, |
|
"eval_rougeL": 0.0584, |
|
"eval_rougeLsum": 0.0582, |
|
"eval_runtime": 11.5492, |
|
"eval_samples_per_second": 9.524, |
|
"eval_steps_per_second": 1.212, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_gen_len": 12.9636, |
|
"eval_loss": 21.254241943359375, |
|
"eval_rouge1": 0.0726, |
|
"eval_rouge2": 0.0153, |
|
"eval_rougeL": 0.0568, |
|
"eval_rougeLsum": 0.0569, |
|
"eval_runtime": 9.5424, |
|
"eval_samples_per_second": 11.528, |
|
"eval_steps_per_second": 1.467, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_gen_len": 12.9636, |
|
"eval_loss": 21.031164169311523, |
|
"eval_rouge1": 0.0724, |
|
"eval_rouge2": 0.0176, |
|
"eval_rougeL": 0.0568, |
|
"eval_rougeLsum": 0.0569, |
|
"eval_runtime": 9.5313, |
|
"eval_samples_per_second": 11.541, |
|
"eval_steps_per_second": 1.469, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 12.6727, |
|
"eval_loss": 20.743330001831055, |
|
"eval_rouge1": 0.0722, |
|
"eval_rouge2": 0.0175, |
|
"eval_rougeL": 0.055, |
|
"eval_rougeLsum": 0.0551, |
|
"eval_runtime": 9.5638, |
|
"eval_samples_per_second": 11.502, |
|
"eval_steps_per_second": 1.464, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_gen_len": 12.5273, |
|
"eval_loss": 20.430522918701172, |
|
"eval_rouge1": 0.0708, |
|
"eval_rouge2": 0.0177, |
|
"eval_rougeL": 0.0545, |
|
"eval_rougeLsum": 0.0547, |
|
"eval_runtime": 9.583, |
|
"eval_samples_per_second": 11.479, |
|
"eval_steps_per_second": 1.461, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_gen_len": 12.6091, |
|
"eval_loss": 20.10591697692871, |
|
"eval_rouge1": 0.0725, |
|
"eval_rouge2": 0.0185, |
|
"eval_rougeL": 0.0559, |
|
"eval_rougeLsum": 0.0559, |
|
"eval_runtime": 9.5892, |
|
"eval_samples_per_second": 11.471, |
|
"eval_steps_per_second": 1.46, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_gen_len": 12.6091, |
|
"eval_loss": 19.850391387939453, |
|
"eval_rouge1": 0.0727, |
|
"eval_rouge2": 0.0169, |
|
"eval_rougeL": 0.0551, |
|
"eval_rougeLsum": 0.0547, |
|
"eval_runtime": 9.6928, |
|
"eval_samples_per_second": 11.349, |
|
"eval_steps_per_second": 1.444, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 13.2909, |
|
"eval_loss": 19.628494262695312, |
|
"eval_rouge1": 0.0816, |
|
"eval_rouge2": 0.0228, |
|
"eval_rougeL": 0.062, |
|
"eval_rougeLsum": 0.0622, |
|
"eval_runtime": 9.7244, |
|
"eval_samples_per_second": 11.312, |
|
"eval_steps_per_second": 1.44, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"eval_gen_len": 13.2909, |
|
"eval_loss": 19.41258430480957, |
|
"eval_rouge1": 0.0811, |
|
"eval_rouge2": 0.0214, |
|
"eval_rougeL": 0.0613, |
|
"eval_rougeLsum": 0.0614, |
|
"eval_runtime": 9.5996, |
|
"eval_samples_per_second": 11.459, |
|
"eval_steps_per_second": 1.458, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"eval_gen_len": 13.5182, |
|
"eval_loss": 19.160032272338867, |
|
"eval_rouge1": 0.0818, |
|
"eval_rouge2": 0.0208, |
|
"eval_rougeL": 0.0632, |
|
"eval_rougeLsum": 0.0633, |
|
"eval_runtime": 9.5959, |
|
"eval_samples_per_second": 11.463, |
|
"eval_steps_per_second": 1.459, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_gen_len": 13.8909, |
|
"eval_loss": 18.8905086517334, |
|
"eval_rouge1": 0.09, |
|
"eval_rouge2": 0.024, |
|
"eval_rougeL": 0.0698, |
|
"eval_rougeLsum": 0.0696, |
|
"eval_runtime": 9.6532, |
|
"eval_samples_per_second": 11.395, |
|
"eval_steps_per_second": 1.45, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 14.1818, |
|
"eval_loss": 18.593591690063477, |
|
"eval_rouge1": 0.094, |
|
"eval_rouge2": 0.0324, |
|
"eval_rougeL": 0.0735, |
|
"eval_rougeLsum": 0.0732, |
|
"eval_runtime": 9.6893, |
|
"eval_samples_per_second": 11.353, |
|
"eval_steps_per_second": 1.445, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"eval_gen_len": 14.4364, |
|
"eval_loss": 18.286243438720703, |
|
"eval_rouge1": 0.0928, |
|
"eval_rouge2": 0.0329, |
|
"eval_rougeL": 0.0746, |
|
"eval_rougeLsum": 0.0749, |
|
"eval_runtime": 9.7163, |
|
"eval_samples_per_second": 11.321, |
|
"eval_steps_per_second": 1.441, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"eval_gen_len": 15.0727, |
|
"eval_loss": 17.896913528442383, |
|
"eval_rouge1": 0.096, |
|
"eval_rouge2": 0.0328, |
|
"eval_rougeL": 0.0788, |
|
"eval_rougeLsum": 0.0792, |
|
"eval_runtime": 9.6929, |
|
"eval_samples_per_second": 11.348, |
|
"eval_steps_per_second": 1.444, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_gen_len": 15.7364, |
|
"eval_loss": 17.442358016967773, |
|
"eval_rouge1": 0.1015, |
|
"eval_rouge2": 0.0334, |
|
"eval_rougeL": 0.0816, |
|
"eval_rougeLsum": 0.0818, |
|
"eval_runtime": 9.6767, |
|
"eval_samples_per_second": 11.368, |
|
"eval_steps_per_second": 1.447, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 16.2364, |
|
"eval_loss": 16.91552734375, |
|
"eval_rouge1": 0.1001, |
|
"eval_rouge2": 0.0337, |
|
"eval_rougeL": 0.0811, |
|
"eval_rougeLsum": 0.0814, |
|
"eval_runtime": 9.5645, |
|
"eval_samples_per_second": 11.501, |
|
"eval_steps_per_second": 1.464, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_gen_len": 16.6818, |
|
"eval_loss": 16.372838973999023, |
|
"eval_rouge1": 0.102, |
|
"eval_rouge2": 0.0336, |
|
"eval_rougeL": 0.081, |
|
"eval_rougeLsum": 0.0809, |
|
"eval_runtime": 9.7488, |
|
"eval_samples_per_second": 11.283, |
|
"eval_steps_per_second": 1.436, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 17.96, |
|
"eval_gen_len": 16.5364, |
|
"eval_loss": 15.727865219116211, |
|
"eval_rouge1": 0.0901, |
|
"eval_rouge2": 0.0301, |
|
"eval_rougeL": 0.0739, |
|
"eval_rougeLsum": 0.0741, |
|
"eval_runtime": 9.6417, |
|
"eval_samples_per_second": 11.409, |
|
"eval_steps_per_second": 1.452, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_gen_len": 16.9818, |
|
"eval_loss": 14.980103492736816, |
|
"eval_rouge1": 0.0828, |
|
"eval_rouge2": 0.0258, |
|
"eval_rougeL": 0.0673, |
|
"eval_rougeLsum": 0.0671, |
|
"eval_runtime": 9.6417, |
|
"eval_samples_per_second": 11.409, |
|
"eval_steps_per_second": 1.452, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_gen_len": 16.4727, |
|
"eval_loss": 14.11096477508545, |
|
"eval_rouge1": 0.0687, |
|
"eval_rouge2": 0.0146, |
|
"eval_rougeL": 0.0549, |
|
"eval_rougeLsum": 0.0547, |
|
"eval_runtime": 9.556, |
|
"eval_samples_per_second": 11.511, |
|
"eval_steps_per_second": 1.465, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"eval_gen_len": 14.2364, |
|
"eval_loss": 13.337847709655762, |
|
"eval_rouge1": 0.051, |
|
"eval_rouge2": 0.0102, |
|
"eval_rougeL": 0.0418, |
|
"eval_rougeLsum": 0.0414, |
|
"eval_runtime": 9.7157, |
|
"eval_samples_per_second": 11.322, |
|
"eval_steps_per_second": 1.441, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"eval_gen_len": 12.3818, |
|
"eval_loss": 12.585112571716309, |
|
"eval_rouge1": 0.0377, |
|
"eval_rouge2": 0.007, |
|
"eval_rougeL": 0.0317, |
|
"eval_rougeLsum": 0.0317, |
|
"eval_runtime": 9.5457, |
|
"eval_samples_per_second": 11.524, |
|
"eval_steps_per_second": 1.467, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 22.98, |
|
"eval_gen_len": 12.0455, |
|
"eval_loss": 11.860977172851562, |
|
"eval_rouge1": 0.023, |
|
"eval_rouge2": 0.0048, |
|
"eval_rougeL": 0.0204, |
|
"eval_rougeLsum": 0.0204, |
|
"eval_runtime": 9.5524, |
|
"eval_samples_per_second": 11.515, |
|
"eval_steps_per_second": 1.466, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_gen_len": 10.8545, |
|
"eval_loss": 11.155168533325195, |
|
"eval_rouge1": 0.012, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0108, |
|
"eval_rougeLsum": 0.0108, |
|
"eval_runtime": 9.5834, |
|
"eval_samples_per_second": 11.478, |
|
"eval_steps_per_second": 1.461, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"eval_gen_len": 10.0273, |
|
"eval_loss": 10.512735366821289, |
|
"eval_rouge1": 0.0037, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0035, |
|
"eval_rougeLsum": 0.0036, |
|
"eval_runtime": 9.613, |
|
"eval_samples_per_second": 11.443, |
|
"eval_steps_per_second": 1.456, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 25.96, |
|
"eval_gen_len": 11.6, |
|
"eval_loss": 9.83348560333252, |
|
"eval_rouge1": 0.0039, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0038, |
|
"eval_rougeLsum": 0.0039, |
|
"eval_runtime": 9.5253, |
|
"eval_samples_per_second": 11.548, |
|
"eval_steps_per_second": 1.47, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 26.98, |
|
"eval_gen_len": 13.0455, |
|
"eval_loss": 9.216172218322754, |
|
"eval_rouge1": 0.0016, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0016, |
|
"eval_rougeLsum": 0.0016, |
|
"eval_runtime": 9.5441, |
|
"eval_samples_per_second": 11.525, |
|
"eval_steps_per_second": 1.467, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_gen_len": 14.6818, |
|
"eval_loss": 8.572382926940918, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.6018, |
|
"eval_samples_per_second": 11.456, |
|
"eval_steps_per_second": 1.458, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"eval_gen_len": 15.2727, |
|
"eval_loss": 8.037731170654297, |
|
"eval_rouge1": 0.0009, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0009, |
|
"eval_rougeLsum": 0.0009, |
|
"eval_runtime": 9.5646, |
|
"eval_samples_per_second": 11.501, |
|
"eval_steps_per_second": 1.464, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 29.96, |
|
"eval_gen_len": 16.3909, |
|
"eval_loss": 7.415070056915283, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.5548, |
|
"eval_samples_per_second": 11.513, |
|
"eval_steps_per_second": 1.465, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 30.98, |
|
"eval_gen_len": 17.8364, |
|
"eval_loss": 6.802865028381348, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5629, |
|
"eval_samples_per_second": 11.503, |
|
"eval_steps_per_second": 1.464, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_gen_len": 18.2818, |
|
"eval_loss": 6.211207866668701, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5163, |
|
"eval_samples_per_second": 11.559, |
|
"eval_steps_per_second": 1.471, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 32.95, |
|
"eval_gen_len": 18.7091, |
|
"eval_loss": 5.717328071594238, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.505, |
|
"eval_samples_per_second": 11.573, |
|
"eval_steps_per_second": 1.473, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"eval_gen_len": 18.7091, |
|
"eval_loss": 5.17288064956665, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.5443, |
|
"eval_samples_per_second": 11.525, |
|
"eval_steps_per_second": 1.467, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 34.98, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 4.669548034667969, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.4845, |
|
"eval_samples_per_second": 11.598, |
|
"eval_steps_per_second": 1.476, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 4.216309547424316, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.5515, |
|
"eval_samples_per_second": 11.516, |
|
"eval_steps_per_second": 1.466, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"grad_norm": 5.376431465148926, |
|
"learning_rate": 1.660854700854701e-05, |
|
"loss": 14.4939, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 36.95, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.8450570106506348, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5449, |
|
"eval_samples_per_second": 11.525, |
|
"eval_steps_per_second": 1.467, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 37.96, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.507812261581421, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.5902, |
|
"eval_samples_per_second": 11.47, |
|
"eval_steps_per_second": 1.46, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 38.98, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.231505870819092, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5074, |
|
"eval_samples_per_second": 11.57, |
|
"eval_steps_per_second": 1.473, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.9967288970947266, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5195, |
|
"eval_samples_per_second": 11.555, |
|
"eval_steps_per_second": 1.471, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 40.95, |
|
"eval_gen_len": 18.9545, |
|
"eval_loss": 2.8089849948883057, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5224, |
|
"eval_samples_per_second": 11.552, |
|
"eval_steps_per_second": 1.47, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"eval_gen_len": 18.3727, |
|
"eval_loss": 2.6384663581848145, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.5129, |
|
"eval_samples_per_second": 11.563, |
|
"eval_steps_per_second": 1.472, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 42.98, |
|
"eval_gen_len": 16.7273, |
|
"eval_loss": 2.492598295211792, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.5174, |
|
"eval_samples_per_second": 11.558, |
|
"eval_steps_per_second": 1.471, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_gen_len": 11.9636, |
|
"eval_loss": 2.3677501678466797, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.5796, |
|
"eval_samples_per_second": 11.483, |
|
"eval_steps_per_second": 1.461, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 44.95, |
|
"eval_gen_len": 9.2455, |
|
"eval_loss": 2.2777955532073975, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6685, |
|
"eval_samples_per_second": 11.377, |
|
"eval_steps_per_second": 1.448, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 45.96, |
|
"eval_gen_len": 7.9455, |
|
"eval_loss": 2.198147773742676, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.585, |
|
"eval_samples_per_second": 11.476, |
|
"eval_steps_per_second": 1.461, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 46.98, |
|
"eval_gen_len": 7.5909, |
|
"eval_loss": 2.1306562423706055, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6008, |
|
"eval_samples_per_second": 11.457, |
|
"eval_steps_per_second": 1.458, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_gen_len": 7.4091, |
|
"eval_loss": 2.0773117542266846, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.6506, |
|
"eval_samples_per_second": 11.398, |
|
"eval_steps_per_second": 1.451, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 48.95, |
|
"eval_gen_len": 7.2909, |
|
"eval_loss": 2.036808729171753, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5097, |
|
"eval_samples_per_second": 11.567, |
|
"eval_steps_per_second": 1.472, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 49.96, |
|
"eval_gen_len": 6.8364, |
|
"eval_loss": 1.9949748516082764, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6031, |
|
"eval_samples_per_second": 11.455, |
|
"eval_steps_per_second": 1.458, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 50.98, |
|
"eval_gen_len": 7.8273, |
|
"eval_loss": 1.957520842552185, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.5071, |
|
"eval_samples_per_second": 11.57, |
|
"eval_steps_per_second": 1.473, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_gen_len": 7.5545, |
|
"eval_loss": 1.9219788312911987, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5836, |
|
"eval_samples_per_second": 11.478, |
|
"eval_steps_per_second": 1.461, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 52.95, |
|
"eval_gen_len": 7.5364, |
|
"eval_loss": 1.8916202783584595, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6019, |
|
"eval_samples_per_second": 11.456, |
|
"eval_steps_per_second": 1.458, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 53.96, |
|
"eval_gen_len": 7.1182, |
|
"eval_loss": 1.8674402236938477, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5024, |
|
"eval_samples_per_second": 11.576, |
|
"eval_steps_per_second": 1.473, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 54.98, |
|
"eval_gen_len": 7.0364, |
|
"eval_loss": 1.846158742904663, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6139, |
|
"eval_samples_per_second": 11.442, |
|
"eval_steps_per_second": 1.456, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_gen_len": 7.0, |
|
"eval_loss": 1.827086329460144, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.591, |
|
"eval_samples_per_second": 11.469, |
|
"eval_steps_per_second": 1.46, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 56.95, |
|
"eval_gen_len": 7.5455, |
|
"eval_loss": 1.8088210821151733, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.572, |
|
"eval_samples_per_second": 11.492, |
|
"eval_steps_per_second": 1.463, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 57.96, |
|
"eval_gen_len": 7.9, |
|
"eval_loss": 1.789602279663086, |
|
"eval_rouge1": 0.0001, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0001, |
|
"eval_rougeLsum": 0.0001, |
|
"eval_runtime": 9.5837, |
|
"eval_samples_per_second": 11.478, |
|
"eval_steps_per_second": 1.461, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 58.98, |
|
"eval_gen_len": 8.2545, |
|
"eval_loss": 1.7687013149261475, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.6152, |
|
"eval_samples_per_second": 11.44, |
|
"eval_steps_per_second": 1.456, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_gen_len": 8.3636, |
|
"eval_loss": 1.7496564388275146, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4902, |
|
"eval_samples_per_second": 11.591, |
|
"eval_steps_per_second": 1.475, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 60.95, |
|
"eval_gen_len": 9.1455, |
|
"eval_loss": 1.7332907915115356, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.5867, |
|
"eval_samples_per_second": 11.474, |
|
"eval_steps_per_second": 1.46, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 61.96, |
|
"eval_gen_len": 8.9, |
|
"eval_loss": 1.7185932397842407, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4797, |
|
"eval_samples_per_second": 11.604, |
|
"eval_steps_per_second": 1.477, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 62.98, |
|
"eval_gen_len": 9.7545, |
|
"eval_loss": 1.7047526836395264, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.618, |
|
"eval_samples_per_second": 11.437, |
|
"eval_steps_per_second": 1.456, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_gen_len": 9.9818, |
|
"eval_loss": 1.6921414136886597, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6264, |
|
"eval_samples_per_second": 11.427, |
|
"eval_steps_per_second": 1.454, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 64.95, |
|
"eval_gen_len": 9.6909, |
|
"eval_loss": 1.6816327571868896, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5134, |
|
"eval_samples_per_second": 11.563, |
|
"eval_steps_per_second": 1.472, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 65.96, |
|
"eval_gen_len": 8.9545, |
|
"eval_loss": 1.6697723865509033, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5506, |
|
"eval_samples_per_second": 11.518, |
|
"eval_steps_per_second": 1.466, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"eval_gen_len": 9.6818, |
|
"eval_loss": 1.6568113565444946, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5388, |
|
"eval_samples_per_second": 11.532, |
|
"eval_steps_per_second": 1.468, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_gen_len": 9.9455, |
|
"eval_loss": 1.6469463109970093, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4825, |
|
"eval_samples_per_second": 11.6, |
|
"eval_steps_per_second": 1.476, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 68.95, |
|
"eval_gen_len": 9.3545, |
|
"eval_loss": 1.6408612728118896, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.5212, |
|
"eval_samples_per_second": 11.553, |
|
"eval_steps_per_second": 1.47, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 69.96, |
|
"eval_gen_len": 9.1545, |
|
"eval_loss": 1.6316603422164917, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4986, |
|
"eval_samples_per_second": 11.581, |
|
"eval_steps_per_second": 1.474, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 70.98, |
|
"eval_gen_len": 9.7818, |
|
"eval_loss": 1.623169183731079, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5276, |
|
"eval_samples_per_second": 11.545, |
|
"eval_steps_per_second": 1.469, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_gen_len": 10.0273, |
|
"eval_loss": 1.6152759790420532, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5801, |
|
"eval_samples_per_second": 11.482, |
|
"eval_steps_per_second": 1.461, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"grad_norm": 3.344996213912964, |
|
"learning_rate": 1.3196581196581197e-05, |
|
"loss": 2.6089, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 72.95, |
|
"eval_gen_len": 9.1727, |
|
"eval_loss": 1.6071548461914062, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.6124, |
|
"eval_samples_per_second": 11.444, |
|
"eval_steps_per_second": 1.456, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 73.96, |
|
"eval_gen_len": 9.1545, |
|
"eval_loss": 1.599768042564392, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.498, |
|
"eval_samples_per_second": 11.581, |
|
"eval_steps_per_second": 1.474, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 74.98, |
|
"eval_gen_len": 9.5273, |
|
"eval_loss": 1.5934444665908813, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5664, |
|
"eval_samples_per_second": 11.499, |
|
"eval_steps_per_second": 1.463, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_gen_len": 10.4091, |
|
"eval_loss": 1.5867650508880615, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.579, |
|
"eval_samples_per_second": 11.483, |
|
"eval_steps_per_second": 1.462, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 76.95, |
|
"eval_gen_len": 10.3, |
|
"eval_loss": 1.5827070474624634, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.4875, |
|
"eval_samples_per_second": 11.594, |
|
"eval_steps_per_second": 1.476, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 77.96, |
|
"eval_gen_len": 9.6182, |
|
"eval_loss": 1.5748482942581177, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.4782, |
|
"eval_samples_per_second": 11.606, |
|
"eval_steps_per_second": 1.477, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 78.98, |
|
"eval_gen_len": 9.6273, |
|
"eval_loss": 1.5662298202514648, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.4778, |
|
"eval_samples_per_second": 11.606, |
|
"eval_steps_per_second": 1.477, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_gen_len": 9.5273, |
|
"eval_loss": 1.5585095882415771, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.4851, |
|
"eval_samples_per_second": 11.597, |
|
"eval_steps_per_second": 1.476, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 80.95, |
|
"eval_gen_len": 10.1364, |
|
"eval_loss": 1.5539778470993042, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6048, |
|
"eval_samples_per_second": 11.453, |
|
"eval_steps_per_second": 1.458, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 81.96, |
|
"eval_gen_len": 9.6182, |
|
"eval_loss": 1.541092038154602, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4829, |
|
"eval_samples_per_second": 11.6, |
|
"eval_steps_per_second": 1.476, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 82.98, |
|
"eval_gen_len": 9.6091, |
|
"eval_loss": 1.5330486297607422, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5558, |
|
"eval_samples_per_second": 11.511, |
|
"eval_steps_per_second": 1.465, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_gen_len": 9.0818, |
|
"eval_loss": 1.5241070985794067, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6145, |
|
"eval_samples_per_second": 11.441, |
|
"eval_steps_per_second": 1.456, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 84.95, |
|
"eval_gen_len": 9.3, |
|
"eval_loss": 1.5192241668701172, |
|
"eval_rouge1": 0.0004, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0004, |
|
"eval_rougeLsum": 0.0004, |
|
"eval_runtime": 9.4781, |
|
"eval_samples_per_second": 11.606, |
|
"eval_steps_per_second": 1.477, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 85.96, |
|
"eval_gen_len": 9.5364, |
|
"eval_loss": 1.5117179155349731, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5291, |
|
"eval_samples_per_second": 11.544, |
|
"eval_steps_per_second": 1.469, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 86.98, |
|
"eval_gen_len": 9.4545, |
|
"eval_loss": 1.4990766048431396, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.6213, |
|
"eval_samples_per_second": 11.433, |
|
"eval_steps_per_second": 1.455, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_gen_len": 9.4182, |
|
"eval_loss": 1.4878661632537842, |
|
"eval_rouge1": 0.0011, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.4886, |
|
"eval_samples_per_second": 11.593, |
|
"eval_steps_per_second": 1.475, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 88.95, |
|
"eval_gen_len": 9.5727, |
|
"eval_loss": 1.479432463645935, |
|
"eval_rouge1": 0.0009, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.5867, |
|
"eval_samples_per_second": 11.474, |
|
"eval_steps_per_second": 1.46, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 89.96, |
|
"eval_gen_len": 9.3909, |
|
"eval_loss": 1.471197247505188, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6139, |
|
"eval_samples_per_second": 11.442, |
|
"eval_steps_per_second": 1.456, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 90.98, |
|
"eval_gen_len": 9.5, |
|
"eval_loss": 1.4649511575698853, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.5947, |
|
"eval_samples_per_second": 11.465, |
|
"eval_steps_per_second": 1.459, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_gen_len": 9.5273, |
|
"eval_loss": 1.4548052549362183, |
|
"eval_rouge1": 0.0016, |
|
"eval_rouge2": 0.0004, |
|
"eval_rougeL": 0.0016, |
|
"eval_rougeLsum": 0.0017, |
|
"eval_runtime": 9.6868, |
|
"eval_samples_per_second": 11.356, |
|
"eval_steps_per_second": 1.445, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 92.95, |
|
"eval_gen_len": 9.3727, |
|
"eval_loss": 1.444838285446167, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5071, |
|
"eval_samples_per_second": 11.57, |
|
"eval_steps_per_second": 1.473, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 93.96, |
|
"eval_gen_len": 9.6, |
|
"eval_loss": 1.4365838766098022, |
|
"eval_rouge1": 0.0014, |
|
"eval_rouge2": 0.0004, |
|
"eval_rougeL": 0.0014, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 9.5507, |
|
"eval_samples_per_second": 11.517, |
|
"eval_steps_per_second": 1.466, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 94.98, |
|
"eval_gen_len": 9.3364, |
|
"eval_loss": 1.4285393953323364, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.6323, |
|
"eval_samples_per_second": 11.42, |
|
"eval_steps_per_second": 1.453, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_gen_len": 9.3455, |
|
"eval_loss": 1.4242411851882935, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.5163, |
|
"eval_samples_per_second": 11.559, |
|
"eval_steps_per_second": 1.471, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 96.95, |
|
"eval_gen_len": 9.4, |
|
"eval_loss": 1.4160754680633545, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.5868, |
|
"eval_samples_per_second": 11.474, |
|
"eval_steps_per_second": 1.46, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 97.96, |
|
"eval_gen_len": 9.4455, |
|
"eval_loss": 1.4052343368530273, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.6109, |
|
"eval_samples_per_second": 11.445, |
|
"eval_steps_per_second": 1.457, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 98.98, |
|
"eval_gen_len": 9.5273, |
|
"eval_loss": 1.3928413391113281, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.6591, |
|
"eval_samples_per_second": 11.388, |
|
"eval_steps_per_second": 1.449, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_gen_len": 9.5182, |
|
"eval_loss": 1.3840864896774292, |
|
"eval_rouge1": 0.0011, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0011, |
|
"eval_rougeLsum": 0.0011, |
|
"eval_runtime": 9.6661, |
|
"eval_samples_per_second": 11.38, |
|
"eval_steps_per_second": 1.448, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 100.95, |
|
"eval_gen_len": 9.3, |
|
"eval_loss": 1.381872296333313, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0001, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.6513, |
|
"eval_samples_per_second": 11.397, |
|
"eval_steps_per_second": 1.451, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 101.96, |
|
"eval_gen_len": 9.3455, |
|
"eval_loss": 1.37588632106781, |
|
"eval_rouge1": 0.0004, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0004, |
|
"eval_rougeLsum": 0.0004, |
|
"eval_runtime": 9.5607, |
|
"eval_samples_per_second": 11.505, |
|
"eval_steps_per_second": 1.464, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 102.98, |
|
"eval_gen_len": 9.2636, |
|
"eval_loss": 1.3675447702407837, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5449, |
|
"eval_samples_per_second": 11.524, |
|
"eval_steps_per_second": 1.467, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_gen_len": 9.2455, |
|
"eval_loss": 1.358955979347229, |
|
"eval_rouge1": 0.0012, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0012, |
|
"eval_rougeLsum": 0.0012, |
|
"eval_runtime": 9.5868, |
|
"eval_samples_per_second": 11.474, |
|
"eval_steps_per_second": 1.46, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 104.95, |
|
"eval_gen_len": 9.1455, |
|
"eval_loss": 1.3501193523406982, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.6684, |
|
"eval_samples_per_second": 11.377, |
|
"eval_steps_per_second": 1.448, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 105.96, |
|
"eval_gen_len": 9.1636, |
|
"eval_loss": 1.344258189201355, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.5388, |
|
"eval_samples_per_second": 11.532, |
|
"eval_steps_per_second": 1.468, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 106.98, |
|
"eval_gen_len": 9.8, |
|
"eval_loss": 1.3355817794799805, |
|
"eval_rouge1": 0.001, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.5263, |
|
"eval_samples_per_second": 11.547, |
|
"eval_steps_per_second": 1.47, |
|
"step": 1471 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_gen_len": 9.9182, |
|
"eval_loss": 1.3305474519729614, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.6315, |
|
"eval_samples_per_second": 11.421, |
|
"eval_steps_per_second": 1.454, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 108.95, |
|
"eval_gen_len": 10.1636, |
|
"eval_loss": 1.3212946653366089, |
|
"eval_rouge1": 0.0028, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0025, |
|
"eval_rougeLsum": 0.0026, |
|
"eval_runtime": 9.5797, |
|
"eval_samples_per_second": 11.483, |
|
"eval_steps_per_second": 1.461, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"grad_norm": 5.480063438415527, |
|
"learning_rate": 9.784615384615387e-06, |
|
"loss": 1.7753, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 109.96, |
|
"eval_gen_len": 9.9091, |
|
"eval_loss": 1.31065833568573, |
|
"eval_rouge1": 0.0019, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0015, |
|
"eval_rougeLsum": 0.0016, |
|
"eval_runtime": 9.57, |
|
"eval_samples_per_second": 11.494, |
|
"eval_steps_per_second": 1.463, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 110.98, |
|
"eval_gen_len": 10.2, |
|
"eval_loss": 1.301637053489685, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0013, |
|
"eval_rougeLsum": 0.0013, |
|
"eval_runtime": 9.5009, |
|
"eval_samples_per_second": 11.578, |
|
"eval_steps_per_second": 1.474, |
|
"step": 1526 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_gen_len": 9.7091, |
|
"eval_loss": 1.2922732830047607, |
|
"eval_rouge1": 0.0014, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0013, |
|
"eval_rougeLsum": 0.0014, |
|
"eval_runtime": 9.5171, |
|
"eval_samples_per_second": 11.558, |
|
"eval_steps_per_second": 1.471, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 112.95, |
|
"eval_gen_len": 9.6273, |
|
"eval_loss": 1.2817051410675049, |
|
"eval_rouge1": 0.001, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.001, |
|
"eval_rougeLsum": 0.001, |
|
"eval_runtime": 9.5296, |
|
"eval_samples_per_second": 11.543, |
|
"eval_steps_per_second": 1.469, |
|
"step": 1553 |
|
}, |
|
{ |
|
"epoch": 113.96, |
|
"eval_gen_len": 9.9818, |
|
"eval_loss": 1.2726068496704102, |
|
"eval_rouge1": 0.001, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.001, |
|
"eval_rougeLsum": 0.001, |
|
"eval_runtime": 9.6467, |
|
"eval_samples_per_second": 11.403, |
|
"eval_steps_per_second": 1.451, |
|
"step": 1567 |
|
}, |
|
{ |
|
"epoch": 114.98, |
|
"eval_gen_len": 9.5273, |
|
"eval_loss": 1.2626111507415771, |
|
"eval_rouge1": 0.0009, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.5006, |
|
"eval_samples_per_second": 11.578, |
|
"eval_steps_per_second": 1.474, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_gen_len": 9.8, |
|
"eval_loss": 1.2526050806045532, |
|
"eval_rouge1": 0.0026, |
|
"eval_rouge2": 0.0006, |
|
"eval_rougeL": 0.0019, |
|
"eval_rougeLsum": 0.0019, |
|
"eval_runtime": 9.5945, |
|
"eval_samples_per_second": 11.465, |
|
"eval_steps_per_second": 1.459, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 116.95, |
|
"eval_gen_len": 9.6182, |
|
"eval_loss": 1.2438002824783325, |
|
"eval_rouge1": 0.0012, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0012, |
|
"eval_rougeLsum": 0.0012, |
|
"eval_runtime": 9.582, |
|
"eval_samples_per_second": 11.48, |
|
"eval_steps_per_second": 1.461, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 117.96, |
|
"eval_gen_len": 9.5727, |
|
"eval_loss": 1.235589861869812, |
|
"eval_rouge1": 0.0016, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0013, |
|
"eval_rougeLsum": 0.0013, |
|
"eval_runtime": 9.5074, |
|
"eval_samples_per_second": 11.57, |
|
"eval_steps_per_second": 1.473, |
|
"step": 1622 |
|
}, |
|
{ |
|
"epoch": 118.98, |
|
"eval_gen_len": 9.5727, |
|
"eval_loss": 1.2265100479125977, |
|
"eval_rouge1": 0.0017, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0017, |
|
"eval_rougeLsum": 0.0017, |
|
"eval_runtime": 9.6164, |
|
"eval_samples_per_second": 11.439, |
|
"eval_steps_per_second": 1.456, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_gen_len": 9.7182, |
|
"eval_loss": 1.21653151512146, |
|
"eval_rouge1": 0.0013, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0011, |
|
"eval_rougeLsum": 0.0011, |
|
"eval_runtime": 9.5387, |
|
"eval_samples_per_second": 11.532, |
|
"eval_steps_per_second": 1.468, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 120.95, |
|
"eval_gen_len": 9.7182, |
|
"eval_loss": 1.208518624305725, |
|
"eval_rouge1": 0.0018, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0014, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 9.6008, |
|
"eval_samples_per_second": 11.457, |
|
"eval_steps_per_second": 1.458, |
|
"step": 1663 |
|
}, |
|
{ |
|
"epoch": 121.96, |
|
"eval_gen_len": 9.4182, |
|
"eval_loss": 1.1974164247512817, |
|
"eval_rouge1": 0.0024, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.0022, |
|
"eval_rougeLsum": 0.0022, |
|
"eval_runtime": 9.5182, |
|
"eval_samples_per_second": 11.557, |
|
"eval_steps_per_second": 1.471, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 122.98, |
|
"eval_gen_len": 9.3273, |
|
"eval_loss": 1.188578486442566, |
|
"eval_rouge1": 0.0013, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0013, |
|
"eval_rougeLsum": 0.0009, |
|
"eval_runtime": 9.5875, |
|
"eval_samples_per_second": 11.473, |
|
"eval_steps_per_second": 1.46, |
|
"step": 1691 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_gen_len": 9.3727, |
|
"eval_loss": 1.1796928644180298, |
|
"eval_rouge1": 0.0018, |
|
"eval_rouge2": 0.0006, |
|
"eval_rougeL": 0.0018, |
|
"eval_rougeLsum": 0.0018, |
|
"eval_runtime": 9.5197, |
|
"eval_samples_per_second": 11.555, |
|
"eval_steps_per_second": 1.471, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 124.95, |
|
"eval_gen_len": 9.4091, |
|
"eval_loss": 1.1718236207962036, |
|
"eval_rouge1": 0.0021, |
|
"eval_rouge2": 0.0006, |
|
"eval_rougeL": 0.0018, |
|
"eval_rougeLsum": 0.0018, |
|
"eval_runtime": 9.5197, |
|
"eval_samples_per_second": 11.555, |
|
"eval_steps_per_second": 1.471, |
|
"step": 1718 |
|
}, |
|
{ |
|
"epoch": 125.96, |
|
"eval_gen_len": 9.3364, |
|
"eval_loss": 1.1624401807785034, |
|
"eval_rouge1": 0.0026, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.0023, |
|
"eval_rougeLsum": 0.0023, |
|
"eval_runtime": 9.5347, |
|
"eval_samples_per_second": 11.537, |
|
"eval_steps_per_second": 1.468, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 126.98, |
|
"eval_gen_len": 9.6364, |
|
"eval_loss": 1.1539288759231567, |
|
"eval_rouge1": 0.0041, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0032, |
|
"eval_rougeLsum": 0.0032, |
|
"eval_runtime": 9.4835, |
|
"eval_samples_per_second": 11.599, |
|
"eval_steps_per_second": 1.476, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_gen_len": 9.5364, |
|
"eval_loss": 1.1447480916976929, |
|
"eval_rouge1": 0.0018, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0017, |
|
"eval_rougeLsum": 0.0016, |
|
"eval_runtime": 9.5263, |
|
"eval_samples_per_second": 11.547, |
|
"eval_steps_per_second": 1.47, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 128.95, |
|
"eval_gen_len": 9.4545, |
|
"eval_loss": 1.1359179019927979, |
|
"eval_rouge1": 0.0026, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0021, |
|
"eval_rougeLsum": 0.0021, |
|
"eval_runtime": 9.5577, |
|
"eval_samples_per_second": 11.509, |
|
"eval_steps_per_second": 1.465, |
|
"step": 1773 |
|
}, |
|
{ |
|
"epoch": 129.96, |
|
"eval_gen_len": 9.3636, |
|
"eval_loss": 1.1250239610671997, |
|
"eval_rouge1": 0.0029, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0026, |
|
"eval_rougeLsum": 0.0025, |
|
"eval_runtime": 9.5118, |
|
"eval_samples_per_second": 11.565, |
|
"eval_steps_per_second": 1.472, |
|
"step": 1787 |
|
}, |
|
{ |
|
"epoch": 130.98, |
|
"eval_gen_len": 9.4364, |
|
"eval_loss": 1.1156790256500244, |
|
"eval_rouge1": 0.0034, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0031, |
|
"eval_rougeLsum": 0.003, |
|
"eval_runtime": 9.4869, |
|
"eval_samples_per_second": 11.595, |
|
"eval_steps_per_second": 1.476, |
|
"step": 1801 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_gen_len": 9.5182, |
|
"eval_loss": 1.1065722703933716, |
|
"eval_rouge1": 0.0029, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0027, |
|
"eval_rougeLsum": 0.0026, |
|
"eval_runtime": 9.6053, |
|
"eval_samples_per_second": 11.452, |
|
"eval_steps_per_second": 1.458, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 132.95, |
|
"eval_gen_len": 9.6636, |
|
"eval_loss": 1.0981847047805786, |
|
"eval_rouge1": 0.0032, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0034, |
|
"eval_rougeLsum": 0.0032, |
|
"eval_runtime": 9.6558, |
|
"eval_samples_per_second": 11.392, |
|
"eval_steps_per_second": 1.45, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 133.96, |
|
"eval_gen_len": 9.7273, |
|
"eval_loss": 1.090613603591919, |
|
"eval_rouge1": 0.0032, |
|
"eval_rouge2": 0.0006, |
|
"eval_rougeL": 0.0031, |
|
"eval_rougeLsum": 0.0029, |
|
"eval_runtime": 9.6549, |
|
"eval_samples_per_second": 11.393, |
|
"eval_steps_per_second": 1.45, |
|
"step": 1842 |
|
}, |
|
{ |
|
"epoch": 134.98, |
|
"eval_gen_len": 10.0818, |
|
"eval_loss": 1.0842803716659546, |
|
"eval_rouge1": 0.0061, |
|
"eval_rouge2": 0.0008, |
|
"eval_rougeL": 0.0052, |
|
"eval_rougeLsum": 0.0052, |
|
"eval_runtime": 9.4978, |
|
"eval_samples_per_second": 11.582, |
|
"eval_steps_per_second": 1.474, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_gen_len": 9.5818, |
|
"eval_loss": 1.075701117515564, |
|
"eval_rouge1": 0.0046, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0042, |
|
"eval_rougeLsum": 0.0042, |
|
"eval_runtime": 9.6101, |
|
"eval_samples_per_second": 11.446, |
|
"eval_steps_per_second": 1.457, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 136.95, |
|
"eval_gen_len": 9.8273, |
|
"eval_loss": 1.0663608312606812, |
|
"eval_rouge1": 0.0048, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0043, |
|
"eval_rougeLsum": 0.0043, |
|
"eval_runtime": 9.6501, |
|
"eval_samples_per_second": 11.399, |
|
"eval_steps_per_second": 1.451, |
|
"step": 1883 |
|
}, |
|
{ |
|
"epoch": 137.96, |
|
"eval_gen_len": 9.8545, |
|
"eval_loss": 1.0555903911590576, |
|
"eval_rouge1": 0.0055, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.0046, |
|
"eval_rougeLsum": 0.0045, |
|
"eval_runtime": 9.5822, |
|
"eval_samples_per_second": 11.48, |
|
"eval_steps_per_second": 1.461, |
|
"step": 1897 |
|
}, |
|
{ |
|
"epoch": 138.98, |
|
"eval_gen_len": 9.9182, |
|
"eval_loss": 1.0459803342819214, |
|
"eval_rouge1": 0.0067, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.0063, |
|
"eval_rougeLsum": 0.0062, |
|
"eval_runtime": 9.659, |
|
"eval_samples_per_second": 11.388, |
|
"eval_steps_per_second": 1.449, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_gen_len": 10.1636, |
|
"eval_loss": 1.0374290943145752, |
|
"eval_rouge1": 0.0067, |
|
"eval_rouge2": 0.0017, |
|
"eval_rougeL": 0.0062, |
|
"eval_rougeLsum": 0.0061, |
|
"eval_runtime": 9.5201, |
|
"eval_samples_per_second": 11.555, |
|
"eval_steps_per_second": 1.471, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 140.95, |
|
"eval_gen_len": 9.8909, |
|
"eval_loss": 1.0285921096801758, |
|
"eval_rouge1": 0.0037, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0034, |
|
"eval_rougeLsum": 0.0034, |
|
"eval_runtime": 9.5989, |
|
"eval_samples_per_second": 11.46, |
|
"eval_steps_per_second": 1.458, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 141.96, |
|
"eval_gen_len": 9.6455, |
|
"eval_loss": 1.0195242166519165, |
|
"eval_rouge1": 0.0033, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0028, |
|
"eval_rougeLsum": 0.0027, |
|
"eval_runtime": 9.5173, |
|
"eval_samples_per_second": 11.558, |
|
"eval_steps_per_second": 1.471, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 142.98, |
|
"eval_gen_len": 9.8182, |
|
"eval_loss": 1.0105475187301636, |
|
"eval_rouge1": 0.0031, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0028, |
|
"eval_rougeLsum": 0.0028, |
|
"eval_runtime": 9.5853, |
|
"eval_samples_per_second": 11.476, |
|
"eval_steps_per_second": 1.461, |
|
"step": 1966 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_gen_len": 9.5091, |
|
"eval_loss": 1.001856803894043, |
|
"eval_rouge1": 0.0025, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0022, |
|
"eval_rougeLsum": 0.002, |
|
"eval_runtime": 9.5828, |
|
"eval_samples_per_second": 11.479, |
|
"eval_steps_per_second": 1.461, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 144.95, |
|
"eval_gen_len": 9.6909, |
|
"eval_loss": 0.9937859773635864, |
|
"eval_rouge1": 0.0023, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0023, |
|
"eval_rougeLsum": 0.0023, |
|
"eval_runtime": 9.5054, |
|
"eval_samples_per_second": 11.572, |
|
"eval_steps_per_second": 1.473, |
|
"step": 1993 |
|
}, |
|
{ |
|
"epoch": 145.45, |
|
"grad_norm": 3.3901820182800293, |
|
"learning_rate": 6.365811965811967e-06, |
|
"loss": 1.4532, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 145.96, |
|
"eval_gen_len": 9.4182, |
|
"eval_loss": 0.985722541809082, |
|
"eval_rouge1": 0.0023, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0023, |
|
"eval_rougeLsum": 0.0023, |
|
"eval_runtime": 9.5248, |
|
"eval_samples_per_second": 11.549, |
|
"eval_steps_per_second": 1.47, |
|
"step": 2007 |
|
}, |
|
{ |
|
"epoch": 146.98, |
|
"eval_gen_len": 9.6818, |
|
"eval_loss": 0.978095531463623, |
|
"eval_rouge1": 0.0028, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0028, |
|
"eval_rougeLsum": 0.0028, |
|
"eval_runtime": 9.5123, |
|
"eval_samples_per_second": 11.564, |
|
"eval_steps_per_second": 1.472, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_gen_len": 9.7636, |
|
"eval_loss": 0.9693424105644226, |
|
"eval_rouge1": 0.0023, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0023, |
|
"eval_rougeLsum": 0.0023, |
|
"eval_runtime": 9.6266, |
|
"eval_samples_per_second": 11.427, |
|
"eval_steps_per_second": 1.454, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 148.95, |
|
"eval_gen_len": 9.7545, |
|
"eval_loss": 0.9614344835281372, |
|
"eval_rouge1": 0.0016, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0017, |
|
"eval_rougeLsum": 0.0016, |
|
"eval_runtime": 9.5312, |
|
"eval_samples_per_second": 11.541, |
|
"eval_steps_per_second": 1.469, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 149.96, |
|
"eval_gen_len": 9.7545, |
|
"eval_loss": 0.9523000121116638, |
|
"eval_rouge1": 0.0025, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0026, |
|
"eval_rougeLsum": 0.0028, |
|
"eval_runtime": 9.5972, |
|
"eval_samples_per_second": 11.462, |
|
"eval_steps_per_second": 1.459, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 150.98, |
|
"eval_gen_len": 9.7, |
|
"eval_loss": 0.9436053037643433, |
|
"eval_rouge1": 0.0009, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0009, |
|
"eval_rougeLsum": 0.001, |
|
"eval_runtime": 9.5824, |
|
"eval_samples_per_second": 11.479, |
|
"eval_steps_per_second": 1.461, |
|
"step": 2076 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_gen_len": 9.8, |
|
"eval_loss": 0.9361644983291626, |
|
"eval_rouge1": 0.0024, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0024, |
|
"eval_rougeLsum": 0.0024, |
|
"eval_runtime": 9.5387, |
|
"eval_samples_per_second": 11.532, |
|
"eval_steps_per_second": 1.468, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 152.95, |
|
"eval_gen_len": 9.8091, |
|
"eval_loss": 0.9287785887718201, |
|
"eval_rouge1": 0.0028, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0028, |
|
"eval_rougeLsum": 0.0028, |
|
"eval_runtime": 9.5888, |
|
"eval_samples_per_second": 11.472, |
|
"eval_steps_per_second": 1.46, |
|
"step": 2103 |
|
}, |
|
{ |
|
"epoch": 153.96, |
|
"eval_gen_len": 9.9273, |
|
"eval_loss": 0.9205007553100586, |
|
"eval_rouge1": 0.0036, |
|
"eval_rouge2": 0.0013, |
|
"eval_rougeL": 0.0036, |
|
"eval_rougeLsum": 0.0036, |
|
"eval_runtime": 9.5824, |
|
"eval_samples_per_second": 11.479, |
|
"eval_steps_per_second": 1.461, |
|
"step": 2117 |
|
}, |
|
{ |
|
"epoch": 154.98, |
|
"eval_gen_len": 10.1, |
|
"eval_loss": 0.9119426608085632, |
|
"eval_rouge1": 0.0037, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0038, |
|
"eval_rougeLsum": 0.0037, |
|
"eval_runtime": 9.5407, |
|
"eval_samples_per_second": 11.53, |
|
"eval_steps_per_second": 1.467, |
|
"step": 2131 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_gen_len": 9.9364, |
|
"eval_loss": 0.9043306112289429, |
|
"eval_rouge1": 0.0034, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0034, |
|
"eval_rougeLsum": 0.0034, |
|
"eval_runtime": 9.5334, |
|
"eval_samples_per_second": 11.538, |
|
"eval_steps_per_second": 1.469, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 156.95, |
|
"eval_gen_len": 9.7818, |
|
"eval_loss": 0.8976907730102539, |
|
"eval_rouge1": 0.0033, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.003, |
|
"eval_rougeLsum": 0.0031, |
|
"eval_runtime": 9.5568, |
|
"eval_samples_per_second": 11.51, |
|
"eval_steps_per_second": 1.465, |
|
"step": 2158 |
|
}, |
|
{ |
|
"epoch": 157.96, |
|
"eval_gen_len": 9.7364, |
|
"eval_loss": 0.8908756971359253, |
|
"eval_rouge1": 0.0033, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.003, |
|
"eval_rougeLsum": 0.0031, |
|
"eval_runtime": 9.5534, |
|
"eval_samples_per_second": 11.514, |
|
"eval_steps_per_second": 1.465, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 158.98, |
|
"eval_gen_len": 9.6273, |
|
"eval_loss": 0.8828199505805969, |
|
"eval_rouge1": 0.0017, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.002, |
|
"eval_rougeLsum": 0.0019, |
|
"eval_runtime": 9.5064, |
|
"eval_samples_per_second": 11.571, |
|
"eval_steps_per_second": 1.473, |
|
"step": 2186 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_gen_len": 9.8273, |
|
"eval_loss": 0.8748722076416016, |
|
"eval_rouge1": 0.0045, |
|
"eval_rouge2": 0.0015, |
|
"eval_rougeL": 0.0036, |
|
"eval_rougeLsum": 0.0036, |
|
"eval_runtime": 9.6814, |
|
"eval_samples_per_second": 11.362, |
|
"eval_steps_per_second": 1.446, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 160.95, |
|
"eval_gen_len": 9.9455, |
|
"eval_loss": 0.8685693740844727, |
|
"eval_rouge1": 0.0061, |
|
"eval_rouge2": 0.0022, |
|
"eval_rougeL": 0.0057, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 9.4971, |
|
"eval_samples_per_second": 11.583, |
|
"eval_steps_per_second": 1.474, |
|
"step": 2213 |
|
}, |
|
{ |
|
"epoch": 161.96, |
|
"eval_gen_len": 9.9364, |
|
"eval_loss": 0.8622080087661743, |
|
"eval_rouge1": 0.0056, |
|
"eval_rouge2": 0.0017, |
|
"eval_rougeL": 0.005, |
|
"eval_rougeLsum": 0.0051, |
|
"eval_runtime": 9.5035, |
|
"eval_samples_per_second": 11.575, |
|
"eval_steps_per_second": 1.473, |
|
"step": 2227 |
|
}, |
|
{ |
|
"epoch": 162.98, |
|
"eval_gen_len": 9.8636, |
|
"eval_loss": 0.8555266261100769, |
|
"eval_rouge1": 0.0049, |
|
"eval_rouge2": 0.0016, |
|
"eval_rougeL": 0.0046, |
|
"eval_rougeLsum": 0.0047, |
|
"eval_runtime": 9.5934, |
|
"eval_samples_per_second": 11.466, |
|
"eval_steps_per_second": 1.459, |
|
"step": 2241 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_gen_len": 9.9455, |
|
"eval_loss": 0.8489392399787903, |
|
"eval_rouge1": 0.0065, |
|
"eval_rouge2": 0.0025, |
|
"eval_rougeL": 0.0062, |
|
"eval_rougeLsum": 0.0062, |
|
"eval_runtime": 9.588, |
|
"eval_samples_per_second": 11.473, |
|
"eval_steps_per_second": 1.46, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 164.95, |
|
"eval_gen_len": 10.1273, |
|
"eval_loss": 0.8434127569198608, |
|
"eval_rouge1": 0.0078, |
|
"eval_rouge2": 0.0022, |
|
"eval_rougeL": 0.0073, |
|
"eval_rougeLsum": 0.0073, |
|
"eval_runtime": 9.5692, |
|
"eval_samples_per_second": 11.495, |
|
"eval_steps_per_second": 1.463, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 165.96, |
|
"eval_gen_len": 10.1, |
|
"eval_loss": 0.8369239568710327, |
|
"eval_rouge1": 0.0072, |
|
"eval_rouge2": 0.0019, |
|
"eval_rougeL": 0.0068, |
|
"eval_rougeLsum": 0.0067, |
|
"eval_runtime": 9.5983, |
|
"eval_samples_per_second": 11.46, |
|
"eval_steps_per_second": 1.459, |
|
"step": 2282 |
|
}, |
|
{ |
|
"epoch": 166.98, |
|
"eval_gen_len": 10.0636, |
|
"eval_loss": 0.8303181529045105, |
|
"eval_rouge1": 0.0068, |
|
"eval_rouge2": 0.0022, |
|
"eval_rougeL": 0.0062, |
|
"eval_rougeLsum": 0.0061, |
|
"eval_runtime": 9.5017, |
|
"eval_samples_per_second": 11.577, |
|
"eval_steps_per_second": 1.473, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_gen_len": 9.9, |
|
"eval_loss": 0.8243575096130371, |
|
"eval_rouge1": 0.0044, |
|
"eval_rouge2": 0.0013, |
|
"eval_rougeL": 0.0039, |
|
"eval_rougeLsum": 0.004, |
|
"eval_runtime": 9.5856, |
|
"eval_samples_per_second": 11.476, |
|
"eval_steps_per_second": 1.461, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 168.95, |
|
"eval_gen_len": 9.9818, |
|
"eval_loss": 0.8190615773200989, |
|
"eval_rouge1": 0.0063, |
|
"eval_rouge2": 0.0022, |
|
"eval_rougeL": 0.0058, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 9.5114, |
|
"eval_samples_per_second": 11.565, |
|
"eval_steps_per_second": 1.472, |
|
"step": 2323 |
|
}, |
|
{ |
|
"epoch": 169.96, |
|
"eval_gen_len": 9.9818, |
|
"eval_loss": 0.8129807114601135, |
|
"eval_rouge1": 0.0054, |
|
"eval_rouge2": 0.0017, |
|
"eval_rougeL": 0.0047, |
|
"eval_rougeLsum": 0.0047, |
|
"eval_runtime": 9.625, |
|
"eval_samples_per_second": 11.429, |
|
"eval_steps_per_second": 1.455, |
|
"step": 2337 |
|
}, |
|
{ |
|
"epoch": 170.98, |
|
"eval_gen_len": 9.9727, |
|
"eval_loss": 0.8074091672897339, |
|
"eval_rouge1": 0.0059, |
|
"eval_rouge2": 0.0017, |
|
"eval_rougeL": 0.0052, |
|
"eval_rougeLsum": 0.0052, |
|
"eval_runtime": 9.5111, |
|
"eval_samples_per_second": 11.565, |
|
"eval_steps_per_second": 1.472, |
|
"step": 2351 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_gen_len": 9.9, |
|
"eval_loss": 0.8017935752868652, |
|
"eval_rouge1": 0.0045, |
|
"eval_rouge2": 0.0008, |
|
"eval_rougeL": 0.0038, |
|
"eval_rougeLsum": 0.0039, |
|
"eval_runtime": 9.5064, |
|
"eval_samples_per_second": 11.571, |
|
"eval_steps_per_second": 1.473, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 172.95, |
|
"eval_gen_len": 9.7091, |
|
"eval_loss": 0.7968164086341858, |
|
"eval_rouge1": 0.0031, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0027, |
|
"eval_rougeLsum": 0.0026, |
|
"eval_runtime": 9.5948, |
|
"eval_samples_per_second": 11.465, |
|
"eval_steps_per_second": 1.459, |
|
"step": 2378 |
|
}, |
|
{ |
|
"epoch": 173.96, |
|
"eval_gen_len": 9.7364, |
|
"eval_loss": 0.7917037010192871, |
|
"eval_rouge1": 0.0025, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0022, |
|
"eval_rougeLsum": 0.0022, |
|
"eval_runtime": 9.636, |
|
"eval_samples_per_second": 11.416, |
|
"eval_steps_per_second": 1.453, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 174.98, |
|
"eval_gen_len": 9.7455, |
|
"eval_loss": 0.787342369556427, |
|
"eval_rouge1": 0.0033, |
|
"eval_rouge2": 0.0016, |
|
"eval_rougeL": 0.0027, |
|
"eval_rougeLsum": 0.0027, |
|
"eval_runtime": 9.5895, |
|
"eval_samples_per_second": 11.471, |
|
"eval_steps_per_second": 1.46, |
|
"step": 2406 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_gen_len": 9.7, |
|
"eval_loss": 0.7829388976097107, |
|
"eval_rouge1": 0.0033, |
|
"eval_rouge2": 0.0016, |
|
"eval_rougeL": 0.0028, |
|
"eval_rougeLsum": 0.0028, |
|
"eval_runtime": 9.5727, |
|
"eval_samples_per_second": 11.491, |
|
"eval_steps_per_second": 1.462, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 176.95, |
|
"eval_gen_len": 9.6091, |
|
"eval_loss": 0.7783145904541016, |
|
"eval_rouge1": 0.0033, |
|
"eval_rouge2": 0.0016, |
|
"eval_rougeL": 0.0028, |
|
"eval_rougeLsum": 0.0028, |
|
"eval_runtime": 9.6033, |
|
"eval_samples_per_second": 11.454, |
|
"eval_steps_per_second": 1.458, |
|
"step": 2433 |
|
}, |
|
{ |
|
"epoch": 177.96, |
|
"eval_gen_len": 9.7091, |
|
"eval_loss": 0.7735804319381714, |
|
"eval_rouge1": 0.0033, |
|
"eval_rouge2": 0.0016, |
|
"eval_rougeL": 0.0028, |
|
"eval_rougeLsum": 0.0028, |
|
"eval_runtime": 9.7017, |
|
"eval_samples_per_second": 11.338, |
|
"eval_steps_per_second": 1.443, |
|
"step": 2447 |
|
}, |
|
{ |
|
"epoch": 178.98, |
|
"eval_gen_len": 9.6364, |
|
"eval_loss": 0.7691650986671448, |
|
"eval_rouge1": 0.0026, |
|
"eval_rouge2": 0.0016, |
|
"eval_rougeL": 0.0028, |
|
"eval_rougeLsum": 0.0028, |
|
"eval_runtime": 9.6163, |
|
"eval_samples_per_second": 11.439, |
|
"eval_steps_per_second": 1.456, |
|
"step": 2461 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_gen_len": 9.4818, |
|
"eval_loss": 0.7652955055236816, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5613, |
|
"eval_samples_per_second": 11.505, |
|
"eval_steps_per_second": 1.464, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 180.95, |
|
"eval_gen_len": 9.4818, |
|
"eval_loss": 0.7611756920814514, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6858, |
|
"eval_samples_per_second": 11.357, |
|
"eval_steps_per_second": 1.445, |
|
"step": 2488 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"grad_norm": 1.818055272102356, |
|
"learning_rate": 2.9470085470085475e-06, |
|
"loss": 1.1581, |
|
"step": 2500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2925, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 225, |
|
"save_steps": 500, |
|
"total_flos": 9.676695641731891e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|