|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 109.0909090909091, |
|
"eval_steps": 500, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.95, |
|
"eval_gen_len": 13.1091, |
|
"eval_loss": 21.40757179260254, |
|
"eval_rouge1": 0.0745, |
|
"eval_rouge2": 0.0159, |
|
"eval_rougeL": 0.0584, |
|
"eval_rougeLsum": 0.0582, |
|
"eval_runtime": 11.5492, |
|
"eval_samples_per_second": 9.524, |
|
"eval_steps_per_second": 1.212, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_gen_len": 12.9636, |
|
"eval_loss": 21.254241943359375, |
|
"eval_rouge1": 0.0726, |
|
"eval_rouge2": 0.0153, |
|
"eval_rougeL": 0.0568, |
|
"eval_rougeLsum": 0.0569, |
|
"eval_runtime": 9.5424, |
|
"eval_samples_per_second": 11.528, |
|
"eval_steps_per_second": 1.467, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_gen_len": 12.9636, |
|
"eval_loss": 21.031164169311523, |
|
"eval_rouge1": 0.0724, |
|
"eval_rouge2": 0.0176, |
|
"eval_rougeL": 0.0568, |
|
"eval_rougeLsum": 0.0569, |
|
"eval_runtime": 9.5313, |
|
"eval_samples_per_second": 11.541, |
|
"eval_steps_per_second": 1.469, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 12.6727, |
|
"eval_loss": 20.743330001831055, |
|
"eval_rouge1": 0.0722, |
|
"eval_rouge2": 0.0175, |
|
"eval_rougeL": 0.055, |
|
"eval_rougeLsum": 0.0551, |
|
"eval_runtime": 9.5638, |
|
"eval_samples_per_second": 11.502, |
|
"eval_steps_per_second": 1.464, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_gen_len": 12.5273, |
|
"eval_loss": 20.430522918701172, |
|
"eval_rouge1": 0.0708, |
|
"eval_rouge2": 0.0177, |
|
"eval_rougeL": 0.0545, |
|
"eval_rougeLsum": 0.0547, |
|
"eval_runtime": 9.583, |
|
"eval_samples_per_second": 11.479, |
|
"eval_steps_per_second": 1.461, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_gen_len": 12.6091, |
|
"eval_loss": 20.10591697692871, |
|
"eval_rouge1": 0.0725, |
|
"eval_rouge2": 0.0185, |
|
"eval_rougeL": 0.0559, |
|
"eval_rougeLsum": 0.0559, |
|
"eval_runtime": 9.5892, |
|
"eval_samples_per_second": 11.471, |
|
"eval_steps_per_second": 1.46, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_gen_len": 12.6091, |
|
"eval_loss": 19.850391387939453, |
|
"eval_rouge1": 0.0727, |
|
"eval_rouge2": 0.0169, |
|
"eval_rougeL": 0.0551, |
|
"eval_rougeLsum": 0.0547, |
|
"eval_runtime": 9.6928, |
|
"eval_samples_per_second": 11.349, |
|
"eval_steps_per_second": 1.444, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 13.2909, |
|
"eval_loss": 19.628494262695312, |
|
"eval_rouge1": 0.0816, |
|
"eval_rouge2": 0.0228, |
|
"eval_rougeL": 0.062, |
|
"eval_rougeLsum": 0.0622, |
|
"eval_runtime": 9.7244, |
|
"eval_samples_per_second": 11.312, |
|
"eval_steps_per_second": 1.44, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"eval_gen_len": 13.2909, |
|
"eval_loss": 19.41258430480957, |
|
"eval_rouge1": 0.0811, |
|
"eval_rouge2": 0.0214, |
|
"eval_rougeL": 0.0613, |
|
"eval_rougeLsum": 0.0614, |
|
"eval_runtime": 9.5996, |
|
"eval_samples_per_second": 11.459, |
|
"eval_steps_per_second": 1.458, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"eval_gen_len": 13.5182, |
|
"eval_loss": 19.160032272338867, |
|
"eval_rouge1": 0.0818, |
|
"eval_rouge2": 0.0208, |
|
"eval_rougeL": 0.0632, |
|
"eval_rougeLsum": 0.0633, |
|
"eval_runtime": 9.5959, |
|
"eval_samples_per_second": 11.463, |
|
"eval_steps_per_second": 1.459, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_gen_len": 13.8909, |
|
"eval_loss": 18.8905086517334, |
|
"eval_rouge1": 0.09, |
|
"eval_rouge2": 0.024, |
|
"eval_rougeL": 0.0698, |
|
"eval_rougeLsum": 0.0696, |
|
"eval_runtime": 9.6532, |
|
"eval_samples_per_second": 11.395, |
|
"eval_steps_per_second": 1.45, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 14.1818, |
|
"eval_loss": 18.593591690063477, |
|
"eval_rouge1": 0.094, |
|
"eval_rouge2": 0.0324, |
|
"eval_rougeL": 0.0735, |
|
"eval_rougeLsum": 0.0732, |
|
"eval_runtime": 9.6893, |
|
"eval_samples_per_second": 11.353, |
|
"eval_steps_per_second": 1.445, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"eval_gen_len": 14.4364, |
|
"eval_loss": 18.286243438720703, |
|
"eval_rouge1": 0.0928, |
|
"eval_rouge2": 0.0329, |
|
"eval_rougeL": 0.0746, |
|
"eval_rougeLsum": 0.0749, |
|
"eval_runtime": 9.7163, |
|
"eval_samples_per_second": 11.321, |
|
"eval_steps_per_second": 1.441, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"eval_gen_len": 15.0727, |
|
"eval_loss": 17.896913528442383, |
|
"eval_rouge1": 0.096, |
|
"eval_rouge2": 0.0328, |
|
"eval_rougeL": 0.0788, |
|
"eval_rougeLsum": 0.0792, |
|
"eval_runtime": 9.6929, |
|
"eval_samples_per_second": 11.348, |
|
"eval_steps_per_second": 1.444, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_gen_len": 15.7364, |
|
"eval_loss": 17.442358016967773, |
|
"eval_rouge1": 0.1015, |
|
"eval_rouge2": 0.0334, |
|
"eval_rougeL": 0.0816, |
|
"eval_rougeLsum": 0.0818, |
|
"eval_runtime": 9.6767, |
|
"eval_samples_per_second": 11.368, |
|
"eval_steps_per_second": 1.447, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 16.2364, |
|
"eval_loss": 16.91552734375, |
|
"eval_rouge1": 0.1001, |
|
"eval_rouge2": 0.0337, |
|
"eval_rougeL": 0.0811, |
|
"eval_rougeLsum": 0.0814, |
|
"eval_runtime": 9.5645, |
|
"eval_samples_per_second": 11.501, |
|
"eval_steps_per_second": 1.464, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_gen_len": 16.6818, |
|
"eval_loss": 16.372838973999023, |
|
"eval_rouge1": 0.102, |
|
"eval_rouge2": 0.0336, |
|
"eval_rougeL": 0.081, |
|
"eval_rougeLsum": 0.0809, |
|
"eval_runtime": 9.7488, |
|
"eval_samples_per_second": 11.283, |
|
"eval_steps_per_second": 1.436, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 17.96, |
|
"eval_gen_len": 16.5364, |
|
"eval_loss": 15.727865219116211, |
|
"eval_rouge1": 0.0901, |
|
"eval_rouge2": 0.0301, |
|
"eval_rougeL": 0.0739, |
|
"eval_rougeLsum": 0.0741, |
|
"eval_runtime": 9.6417, |
|
"eval_samples_per_second": 11.409, |
|
"eval_steps_per_second": 1.452, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_gen_len": 16.9818, |
|
"eval_loss": 14.980103492736816, |
|
"eval_rouge1": 0.0828, |
|
"eval_rouge2": 0.0258, |
|
"eval_rougeL": 0.0673, |
|
"eval_rougeLsum": 0.0671, |
|
"eval_runtime": 9.6417, |
|
"eval_samples_per_second": 11.409, |
|
"eval_steps_per_second": 1.452, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_gen_len": 16.4727, |
|
"eval_loss": 14.11096477508545, |
|
"eval_rouge1": 0.0687, |
|
"eval_rouge2": 0.0146, |
|
"eval_rougeL": 0.0549, |
|
"eval_rougeLsum": 0.0547, |
|
"eval_runtime": 9.556, |
|
"eval_samples_per_second": 11.511, |
|
"eval_steps_per_second": 1.465, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"eval_gen_len": 14.2364, |
|
"eval_loss": 13.337847709655762, |
|
"eval_rouge1": 0.051, |
|
"eval_rouge2": 0.0102, |
|
"eval_rougeL": 0.0418, |
|
"eval_rougeLsum": 0.0414, |
|
"eval_runtime": 9.7157, |
|
"eval_samples_per_second": 11.322, |
|
"eval_steps_per_second": 1.441, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"eval_gen_len": 12.3818, |
|
"eval_loss": 12.585112571716309, |
|
"eval_rouge1": 0.0377, |
|
"eval_rouge2": 0.007, |
|
"eval_rougeL": 0.0317, |
|
"eval_rougeLsum": 0.0317, |
|
"eval_runtime": 9.5457, |
|
"eval_samples_per_second": 11.524, |
|
"eval_steps_per_second": 1.467, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 22.98, |
|
"eval_gen_len": 12.0455, |
|
"eval_loss": 11.860977172851562, |
|
"eval_rouge1": 0.023, |
|
"eval_rouge2": 0.0048, |
|
"eval_rougeL": 0.0204, |
|
"eval_rougeLsum": 0.0204, |
|
"eval_runtime": 9.5524, |
|
"eval_samples_per_second": 11.515, |
|
"eval_steps_per_second": 1.466, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_gen_len": 10.8545, |
|
"eval_loss": 11.155168533325195, |
|
"eval_rouge1": 0.012, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0108, |
|
"eval_rougeLsum": 0.0108, |
|
"eval_runtime": 9.5834, |
|
"eval_samples_per_second": 11.478, |
|
"eval_steps_per_second": 1.461, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"eval_gen_len": 10.0273, |
|
"eval_loss": 10.512735366821289, |
|
"eval_rouge1": 0.0037, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0035, |
|
"eval_rougeLsum": 0.0036, |
|
"eval_runtime": 9.613, |
|
"eval_samples_per_second": 11.443, |
|
"eval_steps_per_second": 1.456, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 25.96, |
|
"eval_gen_len": 11.6, |
|
"eval_loss": 9.83348560333252, |
|
"eval_rouge1": 0.0039, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0038, |
|
"eval_rougeLsum": 0.0039, |
|
"eval_runtime": 9.5253, |
|
"eval_samples_per_second": 11.548, |
|
"eval_steps_per_second": 1.47, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 26.98, |
|
"eval_gen_len": 13.0455, |
|
"eval_loss": 9.216172218322754, |
|
"eval_rouge1": 0.0016, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0016, |
|
"eval_rougeLsum": 0.0016, |
|
"eval_runtime": 9.5441, |
|
"eval_samples_per_second": 11.525, |
|
"eval_steps_per_second": 1.467, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_gen_len": 14.6818, |
|
"eval_loss": 8.572382926940918, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.6018, |
|
"eval_samples_per_second": 11.456, |
|
"eval_steps_per_second": 1.458, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"eval_gen_len": 15.2727, |
|
"eval_loss": 8.037731170654297, |
|
"eval_rouge1": 0.0009, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0009, |
|
"eval_rougeLsum": 0.0009, |
|
"eval_runtime": 9.5646, |
|
"eval_samples_per_second": 11.501, |
|
"eval_steps_per_second": 1.464, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 29.96, |
|
"eval_gen_len": 16.3909, |
|
"eval_loss": 7.415070056915283, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.5548, |
|
"eval_samples_per_second": 11.513, |
|
"eval_steps_per_second": 1.465, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 30.98, |
|
"eval_gen_len": 17.8364, |
|
"eval_loss": 6.802865028381348, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5629, |
|
"eval_samples_per_second": 11.503, |
|
"eval_steps_per_second": 1.464, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_gen_len": 18.2818, |
|
"eval_loss": 6.211207866668701, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5163, |
|
"eval_samples_per_second": 11.559, |
|
"eval_steps_per_second": 1.471, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 32.95, |
|
"eval_gen_len": 18.7091, |
|
"eval_loss": 5.717328071594238, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.505, |
|
"eval_samples_per_second": 11.573, |
|
"eval_steps_per_second": 1.473, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"eval_gen_len": 18.7091, |
|
"eval_loss": 5.17288064956665, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.5443, |
|
"eval_samples_per_second": 11.525, |
|
"eval_steps_per_second": 1.467, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 34.98, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 4.669548034667969, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.4845, |
|
"eval_samples_per_second": 11.598, |
|
"eval_steps_per_second": 1.476, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 4.216309547424316, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.5515, |
|
"eval_samples_per_second": 11.516, |
|
"eval_steps_per_second": 1.466, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"grad_norm": 5.376431465148926, |
|
"learning_rate": 1.660854700854701e-05, |
|
"loss": 14.4939, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 36.95, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.8450570106506348, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5449, |
|
"eval_samples_per_second": 11.525, |
|
"eval_steps_per_second": 1.467, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 37.96, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.507812261581421, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.5902, |
|
"eval_samples_per_second": 11.47, |
|
"eval_steps_per_second": 1.46, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 38.98, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.231505870819092, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5074, |
|
"eval_samples_per_second": 11.57, |
|
"eval_steps_per_second": 1.473, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.9967288970947266, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5195, |
|
"eval_samples_per_second": 11.555, |
|
"eval_steps_per_second": 1.471, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 40.95, |
|
"eval_gen_len": 18.9545, |
|
"eval_loss": 2.8089849948883057, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5224, |
|
"eval_samples_per_second": 11.552, |
|
"eval_steps_per_second": 1.47, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"eval_gen_len": 18.3727, |
|
"eval_loss": 2.6384663581848145, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.5129, |
|
"eval_samples_per_second": 11.563, |
|
"eval_steps_per_second": 1.472, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 42.98, |
|
"eval_gen_len": 16.7273, |
|
"eval_loss": 2.492598295211792, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.5174, |
|
"eval_samples_per_second": 11.558, |
|
"eval_steps_per_second": 1.471, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_gen_len": 11.9636, |
|
"eval_loss": 2.3677501678466797, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.5796, |
|
"eval_samples_per_second": 11.483, |
|
"eval_steps_per_second": 1.461, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 44.95, |
|
"eval_gen_len": 9.2455, |
|
"eval_loss": 2.2777955532073975, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6685, |
|
"eval_samples_per_second": 11.377, |
|
"eval_steps_per_second": 1.448, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 45.96, |
|
"eval_gen_len": 7.9455, |
|
"eval_loss": 2.198147773742676, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.585, |
|
"eval_samples_per_second": 11.476, |
|
"eval_steps_per_second": 1.461, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 46.98, |
|
"eval_gen_len": 7.5909, |
|
"eval_loss": 2.1306562423706055, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6008, |
|
"eval_samples_per_second": 11.457, |
|
"eval_steps_per_second": 1.458, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_gen_len": 7.4091, |
|
"eval_loss": 2.0773117542266846, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.6506, |
|
"eval_samples_per_second": 11.398, |
|
"eval_steps_per_second": 1.451, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 48.95, |
|
"eval_gen_len": 7.2909, |
|
"eval_loss": 2.036808729171753, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5097, |
|
"eval_samples_per_second": 11.567, |
|
"eval_steps_per_second": 1.472, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 49.96, |
|
"eval_gen_len": 6.8364, |
|
"eval_loss": 1.9949748516082764, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6031, |
|
"eval_samples_per_second": 11.455, |
|
"eval_steps_per_second": 1.458, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 50.98, |
|
"eval_gen_len": 7.8273, |
|
"eval_loss": 1.957520842552185, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.5071, |
|
"eval_samples_per_second": 11.57, |
|
"eval_steps_per_second": 1.473, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_gen_len": 7.5545, |
|
"eval_loss": 1.9219788312911987, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5836, |
|
"eval_samples_per_second": 11.478, |
|
"eval_steps_per_second": 1.461, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 52.95, |
|
"eval_gen_len": 7.5364, |
|
"eval_loss": 1.8916202783584595, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6019, |
|
"eval_samples_per_second": 11.456, |
|
"eval_steps_per_second": 1.458, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 53.96, |
|
"eval_gen_len": 7.1182, |
|
"eval_loss": 1.8674402236938477, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5024, |
|
"eval_samples_per_second": 11.576, |
|
"eval_steps_per_second": 1.473, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 54.98, |
|
"eval_gen_len": 7.0364, |
|
"eval_loss": 1.846158742904663, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6139, |
|
"eval_samples_per_second": 11.442, |
|
"eval_steps_per_second": 1.456, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_gen_len": 7.0, |
|
"eval_loss": 1.827086329460144, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.591, |
|
"eval_samples_per_second": 11.469, |
|
"eval_steps_per_second": 1.46, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 56.95, |
|
"eval_gen_len": 7.5455, |
|
"eval_loss": 1.8088210821151733, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.572, |
|
"eval_samples_per_second": 11.492, |
|
"eval_steps_per_second": 1.463, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 57.96, |
|
"eval_gen_len": 7.9, |
|
"eval_loss": 1.789602279663086, |
|
"eval_rouge1": 0.0001, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0001, |
|
"eval_rougeLsum": 0.0001, |
|
"eval_runtime": 9.5837, |
|
"eval_samples_per_second": 11.478, |
|
"eval_steps_per_second": 1.461, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 58.98, |
|
"eval_gen_len": 8.2545, |
|
"eval_loss": 1.7687013149261475, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.6152, |
|
"eval_samples_per_second": 11.44, |
|
"eval_steps_per_second": 1.456, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_gen_len": 8.3636, |
|
"eval_loss": 1.7496564388275146, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4902, |
|
"eval_samples_per_second": 11.591, |
|
"eval_steps_per_second": 1.475, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 60.95, |
|
"eval_gen_len": 9.1455, |
|
"eval_loss": 1.7332907915115356, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.5867, |
|
"eval_samples_per_second": 11.474, |
|
"eval_steps_per_second": 1.46, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 61.96, |
|
"eval_gen_len": 8.9, |
|
"eval_loss": 1.7185932397842407, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4797, |
|
"eval_samples_per_second": 11.604, |
|
"eval_steps_per_second": 1.477, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 62.98, |
|
"eval_gen_len": 9.7545, |
|
"eval_loss": 1.7047526836395264, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.618, |
|
"eval_samples_per_second": 11.437, |
|
"eval_steps_per_second": 1.456, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_gen_len": 9.9818, |
|
"eval_loss": 1.6921414136886597, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6264, |
|
"eval_samples_per_second": 11.427, |
|
"eval_steps_per_second": 1.454, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 64.95, |
|
"eval_gen_len": 9.6909, |
|
"eval_loss": 1.6816327571868896, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5134, |
|
"eval_samples_per_second": 11.563, |
|
"eval_steps_per_second": 1.472, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 65.96, |
|
"eval_gen_len": 8.9545, |
|
"eval_loss": 1.6697723865509033, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5506, |
|
"eval_samples_per_second": 11.518, |
|
"eval_steps_per_second": 1.466, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"eval_gen_len": 9.6818, |
|
"eval_loss": 1.6568113565444946, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5388, |
|
"eval_samples_per_second": 11.532, |
|
"eval_steps_per_second": 1.468, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_gen_len": 9.9455, |
|
"eval_loss": 1.6469463109970093, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4825, |
|
"eval_samples_per_second": 11.6, |
|
"eval_steps_per_second": 1.476, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 68.95, |
|
"eval_gen_len": 9.3545, |
|
"eval_loss": 1.6408612728118896, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.5212, |
|
"eval_samples_per_second": 11.553, |
|
"eval_steps_per_second": 1.47, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 69.96, |
|
"eval_gen_len": 9.1545, |
|
"eval_loss": 1.6316603422164917, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4986, |
|
"eval_samples_per_second": 11.581, |
|
"eval_steps_per_second": 1.474, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 70.98, |
|
"eval_gen_len": 9.7818, |
|
"eval_loss": 1.623169183731079, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5276, |
|
"eval_samples_per_second": 11.545, |
|
"eval_steps_per_second": 1.469, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_gen_len": 10.0273, |
|
"eval_loss": 1.6152759790420532, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5801, |
|
"eval_samples_per_second": 11.482, |
|
"eval_steps_per_second": 1.461, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"grad_norm": 3.344996213912964, |
|
"learning_rate": 1.3196581196581197e-05, |
|
"loss": 2.6089, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 72.95, |
|
"eval_gen_len": 9.1727, |
|
"eval_loss": 1.6071548461914062, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.6124, |
|
"eval_samples_per_second": 11.444, |
|
"eval_steps_per_second": 1.456, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 73.96, |
|
"eval_gen_len": 9.1545, |
|
"eval_loss": 1.599768042564392, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.498, |
|
"eval_samples_per_second": 11.581, |
|
"eval_steps_per_second": 1.474, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 74.98, |
|
"eval_gen_len": 9.5273, |
|
"eval_loss": 1.5934444665908813, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5664, |
|
"eval_samples_per_second": 11.499, |
|
"eval_steps_per_second": 1.463, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_gen_len": 10.4091, |
|
"eval_loss": 1.5867650508880615, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.579, |
|
"eval_samples_per_second": 11.483, |
|
"eval_steps_per_second": 1.462, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 76.95, |
|
"eval_gen_len": 10.3, |
|
"eval_loss": 1.5827070474624634, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.4875, |
|
"eval_samples_per_second": 11.594, |
|
"eval_steps_per_second": 1.476, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 77.96, |
|
"eval_gen_len": 9.6182, |
|
"eval_loss": 1.5748482942581177, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.4782, |
|
"eval_samples_per_second": 11.606, |
|
"eval_steps_per_second": 1.477, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 78.98, |
|
"eval_gen_len": 9.6273, |
|
"eval_loss": 1.5662298202514648, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.4778, |
|
"eval_samples_per_second": 11.606, |
|
"eval_steps_per_second": 1.477, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_gen_len": 9.5273, |
|
"eval_loss": 1.5585095882415771, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.4851, |
|
"eval_samples_per_second": 11.597, |
|
"eval_steps_per_second": 1.476, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 80.95, |
|
"eval_gen_len": 10.1364, |
|
"eval_loss": 1.5539778470993042, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6048, |
|
"eval_samples_per_second": 11.453, |
|
"eval_steps_per_second": 1.458, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 81.96, |
|
"eval_gen_len": 9.6182, |
|
"eval_loss": 1.541092038154602, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4829, |
|
"eval_samples_per_second": 11.6, |
|
"eval_steps_per_second": 1.476, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 82.98, |
|
"eval_gen_len": 9.6091, |
|
"eval_loss": 1.5330486297607422, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5558, |
|
"eval_samples_per_second": 11.511, |
|
"eval_steps_per_second": 1.465, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_gen_len": 9.0818, |
|
"eval_loss": 1.5241070985794067, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6145, |
|
"eval_samples_per_second": 11.441, |
|
"eval_steps_per_second": 1.456, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 84.95, |
|
"eval_gen_len": 9.3, |
|
"eval_loss": 1.5192241668701172, |
|
"eval_rouge1": 0.0004, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0004, |
|
"eval_rougeLsum": 0.0004, |
|
"eval_runtime": 9.4781, |
|
"eval_samples_per_second": 11.606, |
|
"eval_steps_per_second": 1.477, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 85.96, |
|
"eval_gen_len": 9.5364, |
|
"eval_loss": 1.5117179155349731, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5291, |
|
"eval_samples_per_second": 11.544, |
|
"eval_steps_per_second": 1.469, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 86.98, |
|
"eval_gen_len": 9.4545, |
|
"eval_loss": 1.4990766048431396, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.6213, |
|
"eval_samples_per_second": 11.433, |
|
"eval_steps_per_second": 1.455, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_gen_len": 9.4182, |
|
"eval_loss": 1.4878661632537842, |
|
"eval_rouge1": 0.0011, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.4886, |
|
"eval_samples_per_second": 11.593, |
|
"eval_steps_per_second": 1.475, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 88.95, |
|
"eval_gen_len": 9.5727, |
|
"eval_loss": 1.479432463645935, |
|
"eval_rouge1": 0.0009, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.5867, |
|
"eval_samples_per_second": 11.474, |
|
"eval_steps_per_second": 1.46, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 89.96, |
|
"eval_gen_len": 9.3909, |
|
"eval_loss": 1.471197247505188, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.6139, |
|
"eval_samples_per_second": 11.442, |
|
"eval_steps_per_second": 1.456, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 90.98, |
|
"eval_gen_len": 9.5, |
|
"eval_loss": 1.4649511575698853, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.5947, |
|
"eval_samples_per_second": 11.465, |
|
"eval_steps_per_second": 1.459, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_gen_len": 9.5273, |
|
"eval_loss": 1.4548052549362183, |
|
"eval_rouge1": 0.0016, |
|
"eval_rouge2": 0.0004, |
|
"eval_rougeL": 0.0016, |
|
"eval_rougeLsum": 0.0017, |
|
"eval_runtime": 9.6868, |
|
"eval_samples_per_second": 11.356, |
|
"eval_steps_per_second": 1.445, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 92.95, |
|
"eval_gen_len": 9.3727, |
|
"eval_loss": 1.444838285446167, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5071, |
|
"eval_samples_per_second": 11.57, |
|
"eval_steps_per_second": 1.473, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 93.96, |
|
"eval_gen_len": 9.6, |
|
"eval_loss": 1.4365838766098022, |
|
"eval_rouge1": 0.0014, |
|
"eval_rouge2": 0.0004, |
|
"eval_rougeL": 0.0014, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 9.5507, |
|
"eval_samples_per_second": 11.517, |
|
"eval_steps_per_second": 1.466, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 94.98, |
|
"eval_gen_len": 9.3364, |
|
"eval_loss": 1.4285393953323364, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.6323, |
|
"eval_samples_per_second": 11.42, |
|
"eval_steps_per_second": 1.453, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_gen_len": 9.3455, |
|
"eval_loss": 1.4242411851882935, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.5163, |
|
"eval_samples_per_second": 11.559, |
|
"eval_steps_per_second": 1.471, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 96.95, |
|
"eval_gen_len": 9.4, |
|
"eval_loss": 1.4160754680633545, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.5868, |
|
"eval_samples_per_second": 11.474, |
|
"eval_steps_per_second": 1.46, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 97.96, |
|
"eval_gen_len": 9.4455, |
|
"eval_loss": 1.4052343368530273, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.6109, |
|
"eval_samples_per_second": 11.445, |
|
"eval_steps_per_second": 1.457, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 98.98, |
|
"eval_gen_len": 9.5273, |
|
"eval_loss": 1.3928413391113281, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.6591, |
|
"eval_samples_per_second": 11.388, |
|
"eval_steps_per_second": 1.449, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_gen_len": 9.5182, |
|
"eval_loss": 1.3840864896774292, |
|
"eval_rouge1": 0.0011, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0011, |
|
"eval_rougeLsum": 0.0011, |
|
"eval_runtime": 9.6661, |
|
"eval_samples_per_second": 11.38, |
|
"eval_steps_per_second": 1.448, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 100.95, |
|
"eval_gen_len": 9.3, |
|
"eval_loss": 1.381872296333313, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0001, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.6513, |
|
"eval_samples_per_second": 11.397, |
|
"eval_steps_per_second": 1.451, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 101.96, |
|
"eval_gen_len": 9.3455, |
|
"eval_loss": 1.37588632106781, |
|
"eval_rouge1": 0.0004, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0004, |
|
"eval_rougeLsum": 0.0004, |
|
"eval_runtime": 9.5607, |
|
"eval_samples_per_second": 11.505, |
|
"eval_steps_per_second": 1.464, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 102.98, |
|
"eval_gen_len": 9.2636, |
|
"eval_loss": 1.3675447702407837, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5449, |
|
"eval_samples_per_second": 11.524, |
|
"eval_steps_per_second": 1.467, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_gen_len": 9.2455, |
|
"eval_loss": 1.358955979347229, |
|
"eval_rouge1": 0.0012, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0012, |
|
"eval_rougeLsum": 0.0012, |
|
"eval_runtime": 9.5868, |
|
"eval_samples_per_second": 11.474, |
|
"eval_steps_per_second": 1.46, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 104.95, |
|
"eval_gen_len": 9.1455, |
|
"eval_loss": 1.3501193523406982, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.6684, |
|
"eval_samples_per_second": 11.377, |
|
"eval_steps_per_second": 1.448, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 105.96, |
|
"eval_gen_len": 9.1636, |
|
"eval_loss": 1.344258189201355, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.5388, |
|
"eval_samples_per_second": 11.532, |
|
"eval_steps_per_second": 1.468, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 106.98, |
|
"eval_gen_len": 9.8, |
|
"eval_loss": 1.3355817794799805, |
|
"eval_rouge1": 0.001, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.5263, |
|
"eval_samples_per_second": 11.547, |
|
"eval_steps_per_second": 1.47, |
|
"step": 1471 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_gen_len": 9.9182, |
|
"eval_loss": 1.3305474519729614, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.6315, |
|
"eval_samples_per_second": 11.421, |
|
"eval_steps_per_second": 1.454, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 108.95, |
|
"eval_gen_len": 10.1636, |
|
"eval_loss": 1.3212946653366089, |
|
"eval_rouge1": 0.0028, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0025, |
|
"eval_rougeLsum": 0.0026, |
|
"eval_runtime": 9.5797, |
|
"eval_samples_per_second": 11.483, |
|
"eval_steps_per_second": 1.461, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"grad_norm": 5.480063438415527, |
|
"learning_rate": 9.784615384615387e-06, |
|
"loss": 1.7753, |
|
"step": 1500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2925, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 225, |
|
"save_steps": 500, |
|
"total_flos": 5.805822083648717e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|