|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 438.35616438356163, |
|
"eval_steps": 500, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"eval_gen_len": 14.6545, |
|
"eval_loss": 21.487245559692383, |
|
"eval_rouge1": 0.0812, |
|
"eval_rouge2": 0.0183, |
|
"eval_rougeL": 0.0668, |
|
"eval_rougeLsum": 0.0663, |
|
"eval_runtime": 5.8972, |
|
"eval_samples_per_second": 18.653, |
|
"eval_steps_per_second": 3.222, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_gen_len": 14.5091, |
|
"eval_loss": 21.332395553588867, |
|
"eval_rouge1": 0.0808, |
|
"eval_rouge2": 0.0191, |
|
"eval_rougeL": 0.0657, |
|
"eval_rougeLsum": 0.0652, |
|
"eval_runtime": 5.8919, |
|
"eval_samples_per_second": 18.67, |
|
"eval_steps_per_second": 3.225, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_gen_len": 14.0727, |
|
"eval_loss": 21.108753204345703, |
|
"eval_rouge1": 0.0819, |
|
"eval_rouge2": 0.0203, |
|
"eval_rougeL": 0.0671, |
|
"eval_rougeLsum": 0.0665, |
|
"eval_runtime": 5.8908, |
|
"eval_samples_per_second": 18.673, |
|
"eval_steps_per_second": 3.225, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 14.0727, |
|
"eval_loss": 20.799583435058594, |
|
"eval_rouge1": 0.0824, |
|
"eval_rouge2": 0.0206, |
|
"eval_rougeL": 0.0665, |
|
"eval_rougeLsum": 0.0659, |
|
"eval_runtime": 5.8851, |
|
"eval_samples_per_second": 18.691, |
|
"eval_steps_per_second": 3.228, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_gen_len": 14.2182, |
|
"eval_loss": 20.4609317779541, |
|
"eval_rouge1": 0.0814, |
|
"eval_rouge2": 0.0212, |
|
"eval_rougeL": 0.0669, |
|
"eval_rougeLsum": 0.0664, |
|
"eval_runtime": 5.8894, |
|
"eval_samples_per_second": 18.677, |
|
"eval_steps_per_second": 3.226, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_gen_len": 14.2182, |
|
"eval_loss": 20.145660400390625, |
|
"eval_rouge1": 0.0847, |
|
"eval_rouge2": 0.0208, |
|
"eval_rougeL": 0.067, |
|
"eval_rougeLsum": 0.0666, |
|
"eval_runtime": 5.8963, |
|
"eval_samples_per_second": 18.656, |
|
"eval_steps_per_second": 3.222, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_gen_len": 14.0727, |
|
"eval_loss": 19.89990234375, |
|
"eval_rouge1": 0.084, |
|
"eval_rouge2": 0.0184, |
|
"eval_rougeL": 0.0662, |
|
"eval_rougeLsum": 0.0657, |
|
"eval_runtime": 5.8939, |
|
"eval_samples_per_second": 18.663, |
|
"eval_steps_per_second": 3.224, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 14.1455, |
|
"eval_loss": 19.661205291748047, |
|
"eval_rouge1": 0.0882, |
|
"eval_rouge2": 0.0219, |
|
"eval_rougeL": 0.0703, |
|
"eval_rougeLsum": 0.0699, |
|
"eval_runtime": 5.9009, |
|
"eval_samples_per_second": 18.641, |
|
"eval_steps_per_second": 3.22, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_gen_len": 14.9273, |
|
"eval_loss": 19.420059204101562, |
|
"eval_rouge1": 0.0958, |
|
"eval_rouge2": 0.0243, |
|
"eval_rougeL": 0.0775, |
|
"eval_rougeLsum": 0.0772, |
|
"eval_runtime": 5.8976, |
|
"eval_samples_per_second": 18.652, |
|
"eval_steps_per_second": 3.222, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_gen_len": 14.9273, |
|
"eval_loss": 19.1621036529541, |
|
"eval_rouge1": 0.0915, |
|
"eval_rouge2": 0.0256, |
|
"eval_rougeL": 0.0769, |
|
"eval_rougeLsum": 0.0763, |
|
"eval_runtime": 5.937, |
|
"eval_samples_per_second": 18.528, |
|
"eval_steps_per_second": 3.2, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_gen_len": 14.7818, |
|
"eval_loss": 18.873458862304688, |
|
"eval_rouge1": 0.0968, |
|
"eval_rouge2": 0.0284, |
|
"eval_rougeL": 0.0786, |
|
"eval_rougeLsum": 0.0786, |
|
"eval_runtime": 5.8869, |
|
"eval_samples_per_second": 18.686, |
|
"eval_steps_per_second": 3.228, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 14.6364, |
|
"eval_loss": 18.52387237548828, |
|
"eval_rouge1": 0.0901, |
|
"eval_rouge2": 0.0262, |
|
"eval_rougeL": 0.0738, |
|
"eval_rougeLsum": 0.0741, |
|
"eval_runtime": 5.9384, |
|
"eval_samples_per_second": 18.524, |
|
"eval_steps_per_second": 3.2, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_gen_len": 14.4909, |
|
"eval_loss": 18.13555145263672, |
|
"eval_rouge1": 0.0886, |
|
"eval_rouge2": 0.0263, |
|
"eval_rougeL": 0.0714, |
|
"eval_rougeLsum": 0.0717, |
|
"eval_runtime": 5.9432, |
|
"eval_samples_per_second": 18.509, |
|
"eval_steps_per_second": 3.197, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"eval_gen_len": 14.4909, |
|
"eval_loss": 17.66015625, |
|
"eval_rouge1": 0.0915, |
|
"eval_rouge2": 0.03, |
|
"eval_rougeL": 0.0712, |
|
"eval_rougeLsum": 0.0713, |
|
"eval_runtime": 5.9005, |
|
"eval_samples_per_second": 18.643, |
|
"eval_steps_per_second": 3.22, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"eval_gen_len": 15.2182, |
|
"eval_loss": 17.107126235961914, |
|
"eval_rouge1": 0.0933, |
|
"eval_rouge2": 0.0295, |
|
"eval_rougeL": 0.0733, |
|
"eval_rougeLsum": 0.0734, |
|
"eval_runtime": 5.9126, |
|
"eval_samples_per_second": 18.604, |
|
"eval_steps_per_second": 3.213, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 15.2182, |
|
"eval_loss": 16.4465389251709, |
|
"eval_rouge1": 0.0792, |
|
"eval_rouge2": 0.02, |
|
"eval_rougeL": 0.0637, |
|
"eval_rougeLsum": 0.0639, |
|
"eval_runtime": 5.893, |
|
"eval_samples_per_second": 18.666, |
|
"eval_steps_per_second": 3.224, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_gen_len": 14.7818, |
|
"eval_loss": 15.7671480178833, |
|
"eval_rouge1": 0.069, |
|
"eval_rouge2": 0.0176, |
|
"eval_rougeL": 0.0534, |
|
"eval_rougeLsum": 0.0537, |
|
"eval_runtime": 5.8934, |
|
"eval_samples_per_second": 18.665, |
|
"eval_steps_per_second": 3.224, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"eval_gen_len": 13.1545, |
|
"eval_loss": 14.998970031738281, |
|
"eval_rouge1": 0.0566, |
|
"eval_rouge2": 0.0124, |
|
"eval_rougeL": 0.0449, |
|
"eval_rougeLsum": 0.0451, |
|
"eval_runtime": 5.8867, |
|
"eval_samples_per_second": 18.686, |
|
"eval_steps_per_second": 3.228, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"eval_gen_len": 11.1091, |
|
"eval_loss": 14.060928344726562, |
|
"eval_rouge1": 0.0498, |
|
"eval_rouge2": 0.0171, |
|
"eval_rougeL": 0.0368, |
|
"eval_rougeLsum": 0.0367, |
|
"eval_runtime": 5.6651, |
|
"eval_samples_per_second": 19.417, |
|
"eval_steps_per_second": 3.354, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_gen_len": 8.7273, |
|
"eval_loss": 13.174235343933105, |
|
"eval_rouge1": 0.0205, |
|
"eval_rouge2": 0.0044, |
|
"eval_rougeL": 0.0165, |
|
"eval_rougeLsum": 0.0164, |
|
"eval_runtime": 5.7609, |
|
"eval_samples_per_second": 19.094, |
|
"eval_steps_per_second": 3.298, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_gen_len": 6.8636, |
|
"eval_loss": 12.371671676635742, |
|
"eval_rouge1": 0.0139, |
|
"eval_rouge2": 0.0028, |
|
"eval_rougeL": 0.0124, |
|
"eval_rougeLsum": 0.0123, |
|
"eval_runtime": 5.2236, |
|
"eval_samples_per_second": 21.058, |
|
"eval_steps_per_second": 3.637, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"eval_gen_len": 6.8727, |
|
"eval_loss": 11.577987670898438, |
|
"eval_rouge1": 0.0053, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0047, |
|
"eval_rougeLsum": 0.0048, |
|
"eval_runtime": 5.7472, |
|
"eval_samples_per_second": 19.14, |
|
"eval_steps_per_second": 3.306, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 22.96, |
|
"eval_gen_len": 7.3818, |
|
"eval_loss": 10.78397274017334, |
|
"eval_rouge1": 0.0021, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0021, |
|
"eval_rougeLsum": 0.0021, |
|
"eval_runtime": 5.7621, |
|
"eval_samples_per_second": 19.09, |
|
"eval_steps_per_second": 3.297, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_gen_len": 6.8273, |
|
"eval_loss": 9.95447826385498, |
|
"eval_rouge1": 0.0009, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 5.4394, |
|
"eval_samples_per_second": 20.223, |
|
"eval_steps_per_second": 3.493, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_gen_len": 7.0909, |
|
"eval_loss": 9.179459571838379, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.4343, |
|
"eval_samples_per_second": 20.242, |
|
"eval_steps_per_second": 3.496, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"eval_gen_len": 8.1, |
|
"eval_loss": 8.421984672546387, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 5.7592, |
|
"eval_samples_per_second": 19.1, |
|
"eval_steps_per_second": 3.299, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"eval_gen_len": 8.2636, |
|
"eval_loss": 7.694218158721924, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8511, |
|
"eval_samples_per_second": 18.8, |
|
"eval_steps_per_second": 3.247, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 27.4, |
|
"grad_norm": 5.483399868011475, |
|
"learning_rate": 1.8893333333333334e-05, |
|
"loss": 16.3522, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_gen_len": 11.3818, |
|
"eval_loss": 6.939992427825928, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.887, |
|
"eval_samples_per_second": 18.685, |
|
"eval_steps_per_second": 3.227, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_gen_len": 12.6273, |
|
"eval_loss": 6.2829270362854, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.905, |
|
"eval_samples_per_second": 18.628, |
|
"eval_steps_per_second": 3.218, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 29.97, |
|
"eval_gen_len": 15.3091, |
|
"eval_loss": 5.604813575744629, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 5.8732, |
|
"eval_samples_per_second": 18.729, |
|
"eval_steps_per_second": 3.235, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 30.96, |
|
"eval_gen_len": 17.3182, |
|
"eval_loss": 4.977880954742432, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9021, |
|
"eval_samples_per_second": 18.637, |
|
"eval_steps_per_second": 3.219, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_gen_len": 18.3273, |
|
"eval_loss": 4.37266731262207, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 5.9191, |
|
"eval_samples_per_second": 18.584, |
|
"eval_steps_per_second": 3.21, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_gen_len": 18.7091, |
|
"eval_loss": 3.8430399894714355, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 5.8717, |
|
"eval_samples_per_second": 18.734, |
|
"eval_steps_per_second": 3.236, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 33.97, |
|
"eval_gen_len": 18.8545, |
|
"eval_loss": 3.400059938430786, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 5.9022, |
|
"eval_samples_per_second": 18.637, |
|
"eval_steps_per_second": 3.219, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 34.96, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.0639424324035645, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 5.8543, |
|
"eval_samples_per_second": 18.79, |
|
"eval_steps_per_second": 3.245, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.758321762084961, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 5.9112, |
|
"eval_samples_per_second": 18.609, |
|
"eval_steps_per_second": 3.214, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"eval_gen_len": 18.4364, |
|
"eval_loss": 2.5352485179901123, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 5.8968, |
|
"eval_samples_per_second": 18.654, |
|
"eval_steps_per_second": 3.222, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 37.97, |
|
"eval_gen_len": 14.8909, |
|
"eval_loss": 2.3651320934295654, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 5.8625, |
|
"eval_samples_per_second": 18.763, |
|
"eval_steps_per_second": 3.241, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 38.96, |
|
"eval_gen_len": 10.8273, |
|
"eval_loss": 2.230059862136841, |
|
"eval_rouge1": 0.0014, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0014, |
|
"eval_rougeLsum": 0.0014, |
|
"eval_runtime": 5.852, |
|
"eval_samples_per_second": 18.797, |
|
"eval_steps_per_second": 3.247, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_gen_len": 7.9545, |
|
"eval_loss": 2.111604928970337, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8625, |
|
"eval_samples_per_second": 18.763, |
|
"eval_steps_per_second": 3.241, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"eval_gen_len": 6.8364, |
|
"eval_loss": 2.019117832183838, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8494, |
|
"eval_samples_per_second": 18.805, |
|
"eval_steps_per_second": 3.248, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 41.97, |
|
"eval_gen_len": 6.1727, |
|
"eval_loss": 1.950454831123352, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8618, |
|
"eval_samples_per_second": 18.765, |
|
"eval_steps_per_second": 3.241, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 42.96, |
|
"eval_gen_len": 5.6, |
|
"eval_loss": 1.9009199142456055, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8848, |
|
"eval_samples_per_second": 18.692, |
|
"eval_steps_per_second": 3.229, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_gen_len": 5.4455, |
|
"eval_loss": 1.8568826913833618, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8517, |
|
"eval_samples_per_second": 18.798, |
|
"eval_steps_per_second": 3.247, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"eval_gen_len": 5.1909, |
|
"eval_loss": 1.8185267448425293, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8682, |
|
"eval_samples_per_second": 18.745, |
|
"eval_steps_per_second": 3.238, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 45.97, |
|
"eval_gen_len": 5.1182, |
|
"eval_loss": 1.7847113609313965, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8488, |
|
"eval_samples_per_second": 18.807, |
|
"eval_steps_per_second": 3.249, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"eval_gen_len": 4.9455, |
|
"eval_loss": 1.7547551393508911, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8518, |
|
"eval_samples_per_second": 18.798, |
|
"eval_steps_per_second": 3.247, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_gen_len": 4.9455, |
|
"eval_loss": 1.7266695499420166, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8585, |
|
"eval_samples_per_second": 18.776, |
|
"eval_steps_per_second": 3.243, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 48.99, |
|
"eval_gen_len": 4.7455, |
|
"eval_loss": 1.7055079936981201, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8511, |
|
"eval_samples_per_second": 18.8, |
|
"eval_steps_per_second": 3.247, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 49.97, |
|
"eval_gen_len": 4.4727, |
|
"eval_loss": 1.6864752769470215, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.848, |
|
"eval_samples_per_second": 18.81, |
|
"eval_steps_per_second": 3.249, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 50.96, |
|
"eval_gen_len": 4.4, |
|
"eval_loss": 1.6679636240005493, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8942, |
|
"eval_samples_per_second": 18.662, |
|
"eval_steps_per_second": 3.223, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_gen_len": 4.8273, |
|
"eval_loss": 1.6500035524368286, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8572, |
|
"eval_samples_per_second": 18.78, |
|
"eval_steps_per_second": 3.244, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 52.99, |
|
"eval_gen_len": 5.4273, |
|
"eval_loss": 1.6347858905792236, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8874, |
|
"eval_samples_per_second": 18.684, |
|
"eval_steps_per_second": 3.227, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 53.97, |
|
"eval_gen_len": 5.4727, |
|
"eval_loss": 1.620485782623291, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8561, |
|
"eval_samples_per_second": 18.784, |
|
"eval_steps_per_second": 3.244, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 54.79, |
|
"grad_norm": 3.8756470680236816, |
|
"learning_rate": 1.7786666666666667e-05, |
|
"loss": 3.4786, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 54.96, |
|
"eval_gen_len": 4.9909, |
|
"eval_loss": 1.6101189851760864, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9013, |
|
"eval_samples_per_second": 18.64, |
|
"eval_steps_per_second": 3.22, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_gen_len": 4.9727, |
|
"eval_loss": 1.5964934825897217, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8515, |
|
"eval_samples_per_second": 18.799, |
|
"eval_steps_per_second": 3.247, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 56.99, |
|
"eval_gen_len": 5.3, |
|
"eval_loss": 1.583103060722351, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8488, |
|
"eval_samples_per_second": 18.807, |
|
"eval_steps_per_second": 3.249, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 57.97, |
|
"eval_gen_len": 5.2273, |
|
"eval_loss": 1.5738121271133423, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8912, |
|
"eval_samples_per_second": 18.672, |
|
"eval_steps_per_second": 3.225, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 58.96, |
|
"eval_gen_len": 5.5273, |
|
"eval_loss": 1.5626448392868042, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8527, |
|
"eval_samples_per_second": 18.795, |
|
"eval_steps_per_second": 3.246, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_gen_len": 6.3273, |
|
"eval_loss": 1.5520726442337036, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8887, |
|
"eval_samples_per_second": 18.68, |
|
"eval_steps_per_second": 3.227, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 60.99, |
|
"eval_gen_len": 7.1091, |
|
"eval_loss": 1.5398296117782593, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8838, |
|
"eval_samples_per_second": 18.695, |
|
"eval_steps_per_second": 3.229, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 61.97, |
|
"eval_gen_len": 7.9182, |
|
"eval_loss": 1.5261036157608032, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8837, |
|
"eval_samples_per_second": 18.696, |
|
"eval_steps_per_second": 3.229, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 62.96, |
|
"eval_gen_len": 8.6, |
|
"eval_loss": 1.5135173797607422, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8878, |
|
"eval_samples_per_second": 18.683, |
|
"eval_steps_per_second": 3.227, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_gen_len": 8.9727, |
|
"eval_loss": 1.5019876956939697, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8993, |
|
"eval_samples_per_second": 18.646, |
|
"eval_steps_per_second": 3.221, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 64.99, |
|
"eval_gen_len": 9.1455, |
|
"eval_loss": 1.4927572011947632, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8898, |
|
"eval_samples_per_second": 18.676, |
|
"eval_steps_per_second": 3.226, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 65.97, |
|
"eval_gen_len": 9.3636, |
|
"eval_loss": 1.4839699268341064, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8485, |
|
"eval_samples_per_second": 18.808, |
|
"eval_steps_per_second": 3.249, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 66.96, |
|
"eval_gen_len": 9.6727, |
|
"eval_loss": 1.4724147319793701, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8498, |
|
"eval_samples_per_second": 18.804, |
|
"eval_steps_per_second": 3.248, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_gen_len": 9.6545, |
|
"eval_loss": 1.4610724449157715, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.857, |
|
"eval_samples_per_second": 18.781, |
|
"eval_steps_per_second": 3.244, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 68.99, |
|
"eval_gen_len": 9.7182, |
|
"eval_loss": 1.4491708278656006, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8545, |
|
"eval_samples_per_second": 18.789, |
|
"eval_steps_per_second": 3.245, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 69.97, |
|
"eval_gen_len": 9.6727, |
|
"eval_loss": 1.4401447772979736, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8617, |
|
"eval_samples_per_second": 18.766, |
|
"eval_steps_per_second": 3.241, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 70.96, |
|
"eval_gen_len": 9.6818, |
|
"eval_loss": 1.4306913614273071, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.861, |
|
"eval_samples_per_second": 18.768, |
|
"eval_steps_per_second": 3.242, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_gen_len": 9.7636, |
|
"eval_loss": 1.4177192449569702, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8583, |
|
"eval_samples_per_second": 18.777, |
|
"eval_steps_per_second": 3.243, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 72.99, |
|
"eval_gen_len": 9.8182, |
|
"eval_loss": 1.4081608057022095, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8574, |
|
"eval_samples_per_second": 18.78, |
|
"eval_steps_per_second": 3.244, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 73.97, |
|
"eval_gen_len": 9.8, |
|
"eval_loss": 1.3982936143875122, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8596, |
|
"eval_samples_per_second": 18.773, |
|
"eval_steps_per_second": 3.243, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 74.96, |
|
"eval_gen_len": 9.7545, |
|
"eval_loss": 1.385299563407898, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8543, |
|
"eval_samples_per_second": 18.79, |
|
"eval_steps_per_second": 3.245, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_gen_len": 9.8727, |
|
"eval_loss": 1.3723993301391602, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9071, |
|
"eval_samples_per_second": 18.622, |
|
"eval_steps_per_second": 3.216, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 76.99, |
|
"eval_gen_len": 9.8636, |
|
"eval_loss": 1.3635698556900024, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8955, |
|
"eval_samples_per_second": 18.658, |
|
"eval_steps_per_second": 3.223, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 77.97, |
|
"eval_gen_len": 9.7727, |
|
"eval_loss": 1.3577702045440674, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8571, |
|
"eval_samples_per_second": 18.781, |
|
"eval_steps_per_second": 3.244, |
|
"step": 1423 |
|
}, |
|
{ |
|
"epoch": 78.96, |
|
"eval_gen_len": 9.8455, |
|
"eval_loss": 1.350039005279541, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8893, |
|
"eval_samples_per_second": 18.678, |
|
"eval_steps_per_second": 3.226, |
|
"step": 1441 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_gen_len": 9.8, |
|
"eval_loss": 1.3370468616485596, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9191, |
|
"eval_samples_per_second": 18.584, |
|
"eval_steps_per_second": 3.21, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 80.99, |
|
"eval_gen_len": 9.7909, |
|
"eval_loss": 1.3282612562179565, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8938, |
|
"eval_samples_per_second": 18.664, |
|
"eval_steps_per_second": 3.224, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 81.97, |
|
"eval_gen_len": 9.7273, |
|
"eval_loss": 1.3168359994888306, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8597, |
|
"eval_samples_per_second": 18.772, |
|
"eval_steps_per_second": 3.242, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 82.19, |
|
"grad_norm": 2.8337929248809814, |
|
"learning_rate": 1.667777777777778e-05, |
|
"loss": 1.7958, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 82.96, |
|
"eval_gen_len": 9.8727, |
|
"eval_loss": 1.3036466836929321, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8558, |
|
"eval_samples_per_second": 18.785, |
|
"eval_steps_per_second": 3.245, |
|
"step": 1514 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_gen_len": 9.9455, |
|
"eval_loss": 1.2935236692428589, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8683, |
|
"eval_samples_per_second": 18.745, |
|
"eval_steps_per_second": 3.238, |
|
"step": 1533 |
|
}, |
|
{ |
|
"epoch": 84.99, |
|
"eval_gen_len": 9.9182, |
|
"eval_loss": 1.2810677289962769, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8603, |
|
"eval_samples_per_second": 18.77, |
|
"eval_steps_per_second": 3.242, |
|
"step": 1551 |
|
}, |
|
{ |
|
"epoch": 85.97, |
|
"eval_gen_len": 9.9364, |
|
"eval_loss": 1.2679041624069214, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8561, |
|
"eval_samples_per_second": 18.784, |
|
"eval_steps_per_second": 3.244, |
|
"step": 1569 |
|
}, |
|
{ |
|
"epoch": 86.96, |
|
"eval_gen_len": 9.9091, |
|
"eval_loss": 1.259030818939209, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.855, |
|
"eval_samples_per_second": 18.787, |
|
"eval_steps_per_second": 3.245, |
|
"step": 1587 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_gen_len": 9.9636, |
|
"eval_loss": 1.2463934421539307, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8568, |
|
"eval_samples_per_second": 18.782, |
|
"eval_steps_per_second": 3.244, |
|
"step": 1606 |
|
}, |
|
{ |
|
"epoch": 88.99, |
|
"eval_gen_len": 9.9091, |
|
"eval_loss": 1.2380764484405518, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8541, |
|
"eval_samples_per_second": 18.79, |
|
"eval_steps_per_second": 3.246, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 89.97, |
|
"eval_gen_len": 9.9091, |
|
"eval_loss": 1.2272734642028809, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8679, |
|
"eval_samples_per_second": 18.746, |
|
"eval_steps_per_second": 3.238, |
|
"step": 1642 |
|
}, |
|
{ |
|
"epoch": 90.96, |
|
"eval_gen_len": 9.9091, |
|
"eval_loss": 1.2162261009216309, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8611, |
|
"eval_samples_per_second": 18.768, |
|
"eval_steps_per_second": 3.242, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_gen_len": 9.9273, |
|
"eval_loss": 1.2035719156265259, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8669, |
|
"eval_samples_per_second": 18.749, |
|
"eval_steps_per_second": 3.239, |
|
"step": 1679 |
|
}, |
|
{ |
|
"epoch": 92.99, |
|
"eval_gen_len": 9.9727, |
|
"eval_loss": 1.1865819692611694, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8764, |
|
"eval_samples_per_second": 18.719, |
|
"eval_steps_per_second": 3.233, |
|
"step": 1697 |
|
}, |
|
{ |
|
"epoch": 93.97, |
|
"eval_gen_len": 9.9273, |
|
"eval_loss": 1.1713649034500122, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8739, |
|
"eval_samples_per_second": 18.727, |
|
"eval_steps_per_second": 3.235, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 94.96, |
|
"eval_gen_len": 9.8818, |
|
"eval_loss": 1.1566345691680908, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8775, |
|
"eval_samples_per_second": 18.715, |
|
"eval_steps_per_second": 3.233, |
|
"step": 1733 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_gen_len": 9.6818, |
|
"eval_loss": 1.141555905342102, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8697, |
|
"eval_samples_per_second": 18.74, |
|
"eval_steps_per_second": 3.237, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 96.99, |
|
"eval_gen_len": 9.5, |
|
"eval_loss": 1.1269311904907227, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8514, |
|
"eval_samples_per_second": 18.799, |
|
"eval_steps_per_second": 3.247, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 97.97, |
|
"eval_gen_len": 9.6545, |
|
"eval_loss": 1.1134684085845947, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8582, |
|
"eval_samples_per_second": 18.777, |
|
"eval_steps_per_second": 3.243, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 98.96, |
|
"eval_gen_len": 9.7, |
|
"eval_loss": 1.099327564239502, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8567, |
|
"eval_samples_per_second": 18.782, |
|
"eval_steps_per_second": 3.244, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_gen_len": 9.7909, |
|
"eval_loss": 1.0843485593795776, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8693, |
|
"eval_samples_per_second": 18.741, |
|
"eval_steps_per_second": 3.237, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 100.99, |
|
"eval_gen_len": 9.8909, |
|
"eval_loss": 1.0678842067718506, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8524, |
|
"eval_samples_per_second": 18.796, |
|
"eval_steps_per_second": 3.247, |
|
"step": 1843 |
|
}, |
|
{ |
|
"epoch": 101.97, |
|
"eval_gen_len": 9.8636, |
|
"eval_loss": 1.0531669855117798, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8548, |
|
"eval_samples_per_second": 18.788, |
|
"eval_steps_per_second": 3.245, |
|
"step": 1861 |
|
}, |
|
{ |
|
"epoch": 102.96, |
|
"eval_gen_len": 9.8364, |
|
"eval_loss": 1.0373491048812866, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8537, |
|
"eval_samples_per_second": 18.792, |
|
"eval_steps_per_second": 3.246, |
|
"step": 1879 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_gen_len": 9.8636, |
|
"eval_loss": 1.0185768604278564, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8656, |
|
"eval_samples_per_second": 18.754, |
|
"eval_steps_per_second": 3.239, |
|
"step": 1898 |
|
}, |
|
{ |
|
"epoch": 104.99, |
|
"eval_gen_len": 9.9091, |
|
"eval_loss": 1.0003846883773804, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8601, |
|
"eval_samples_per_second": 18.771, |
|
"eval_steps_per_second": 3.242, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 105.97, |
|
"eval_gen_len": 9.9364, |
|
"eval_loss": 0.9851242899894714, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8522, |
|
"eval_samples_per_second": 18.796, |
|
"eval_steps_per_second": 3.247, |
|
"step": 1934 |
|
}, |
|
{ |
|
"epoch": 106.96, |
|
"eval_gen_len": 9.9545, |
|
"eval_loss": 0.9700939059257507, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8548, |
|
"eval_samples_per_second": 18.788, |
|
"eval_steps_per_second": 3.245, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_gen_len": 9.9182, |
|
"eval_loss": 0.9482754468917847, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8554, |
|
"eval_samples_per_second": 18.786, |
|
"eval_steps_per_second": 3.245, |
|
"step": 1971 |
|
}, |
|
{ |
|
"epoch": 108.99, |
|
"eval_gen_len": 9.9273, |
|
"eval_loss": 0.9286762475967407, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8532, |
|
"eval_samples_per_second": 18.793, |
|
"eval_steps_per_second": 3.246, |
|
"step": 1989 |
|
}, |
|
{ |
|
"epoch": 109.59, |
|
"grad_norm": 1.9006233215332031, |
|
"learning_rate": 1.5566666666666668e-05, |
|
"loss": 1.4343, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 109.97, |
|
"eval_gen_len": 9.8364, |
|
"eval_loss": 0.906920850276947, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8648, |
|
"eval_samples_per_second": 18.756, |
|
"eval_steps_per_second": 3.24, |
|
"step": 2007 |
|
}, |
|
{ |
|
"epoch": 110.96, |
|
"eval_gen_len": 9.9273, |
|
"eval_loss": 0.8903268575668335, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8436, |
|
"eval_samples_per_second": 18.824, |
|
"eval_steps_per_second": 3.251, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_gen_len": 9.8818, |
|
"eval_loss": 0.8693720102310181, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8617, |
|
"eval_samples_per_second": 18.766, |
|
"eval_steps_per_second": 3.241, |
|
"step": 2044 |
|
}, |
|
{ |
|
"epoch": 112.99, |
|
"eval_gen_len": 9.8182, |
|
"eval_loss": 0.8487831354141235, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8484, |
|
"eval_samples_per_second": 18.808, |
|
"eval_steps_per_second": 3.249, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 113.97, |
|
"eval_gen_len": 9.8, |
|
"eval_loss": 0.8259497284889221, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8508, |
|
"eval_samples_per_second": 18.801, |
|
"eval_steps_per_second": 3.247, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 114.96, |
|
"eval_gen_len": 9.7818, |
|
"eval_loss": 0.8031529784202576, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8573, |
|
"eval_samples_per_second": 18.78, |
|
"eval_steps_per_second": 3.244, |
|
"step": 2098 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_gen_len": 9.7273, |
|
"eval_loss": 0.7789543271064758, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8581, |
|
"eval_samples_per_second": 18.777, |
|
"eval_steps_per_second": 3.243, |
|
"step": 2117 |
|
}, |
|
{ |
|
"epoch": 116.99, |
|
"eval_gen_len": 9.5909, |
|
"eval_loss": 0.7579861283302307, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8897, |
|
"eval_samples_per_second": 18.677, |
|
"eval_steps_per_second": 3.226, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 117.97, |
|
"eval_gen_len": 9.7273, |
|
"eval_loss": 0.7319938540458679, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8872, |
|
"eval_samples_per_second": 18.684, |
|
"eval_steps_per_second": 3.227, |
|
"step": 2153 |
|
}, |
|
{ |
|
"epoch": 118.96, |
|
"eval_gen_len": 9.7455, |
|
"eval_loss": 0.710875928401947, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8965, |
|
"eval_samples_per_second": 18.655, |
|
"eval_steps_per_second": 3.222, |
|
"step": 2171 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_gen_len": 9.8182, |
|
"eval_loss": 0.6845319271087646, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8634, |
|
"eval_samples_per_second": 18.76, |
|
"eval_steps_per_second": 3.24, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 120.99, |
|
"eval_gen_len": 9.8455, |
|
"eval_loss": 0.6633948683738708, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8707, |
|
"eval_samples_per_second": 18.737, |
|
"eval_steps_per_second": 3.236, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 121.97, |
|
"eval_gen_len": 9.8364, |
|
"eval_loss": 0.6423484683036804, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8549, |
|
"eval_samples_per_second": 18.788, |
|
"eval_steps_per_second": 3.245, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 122.96, |
|
"eval_gen_len": 9.8273, |
|
"eval_loss": 0.6260173916816711, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8887, |
|
"eval_samples_per_second": 18.68, |
|
"eval_steps_per_second": 3.227, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_gen_len": 9.7636, |
|
"eval_loss": 0.604343056678772, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8959, |
|
"eval_samples_per_second": 18.657, |
|
"eval_steps_per_second": 3.223, |
|
"step": 2263 |
|
}, |
|
{ |
|
"epoch": 124.99, |
|
"eval_gen_len": 9.7273, |
|
"eval_loss": 0.5872541666030884, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8541, |
|
"eval_samples_per_second": 18.79, |
|
"eval_steps_per_second": 3.246, |
|
"step": 2281 |
|
}, |
|
{ |
|
"epoch": 125.97, |
|
"eval_gen_len": 9.7818, |
|
"eval_loss": 0.5708852410316467, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8564, |
|
"eval_samples_per_second": 18.783, |
|
"eval_steps_per_second": 3.244, |
|
"step": 2299 |
|
}, |
|
{ |
|
"epoch": 126.96, |
|
"eval_gen_len": 9.8364, |
|
"eval_loss": 0.5527102947235107, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.854, |
|
"eval_samples_per_second": 18.79, |
|
"eval_steps_per_second": 3.246, |
|
"step": 2317 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_gen_len": 9.7545, |
|
"eval_loss": 0.537294864654541, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8561, |
|
"eval_samples_per_second": 18.784, |
|
"eval_steps_per_second": 3.244, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 128.99, |
|
"eval_gen_len": 9.5636, |
|
"eval_loss": 0.5231500864028931, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8429, |
|
"eval_samples_per_second": 18.826, |
|
"eval_steps_per_second": 3.252, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 129.97, |
|
"eval_gen_len": 9.7091, |
|
"eval_loss": 0.5122325420379639, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8959, |
|
"eval_samples_per_second": 18.657, |
|
"eval_steps_per_second": 3.223, |
|
"step": 2372 |
|
}, |
|
{ |
|
"epoch": 130.96, |
|
"eval_gen_len": 9.6, |
|
"eval_loss": 0.501021683216095, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8801, |
|
"eval_samples_per_second": 18.707, |
|
"eval_steps_per_second": 3.231, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_gen_len": 9.7, |
|
"eval_loss": 0.4930221140384674, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8944, |
|
"eval_samples_per_second": 18.662, |
|
"eval_steps_per_second": 3.223, |
|
"step": 2409 |
|
}, |
|
{ |
|
"epoch": 132.99, |
|
"eval_gen_len": 9.5909, |
|
"eval_loss": 0.48477092385292053, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8544, |
|
"eval_samples_per_second": 18.789, |
|
"eval_steps_per_second": 3.245, |
|
"step": 2427 |
|
}, |
|
{ |
|
"epoch": 133.97, |
|
"eval_gen_len": 9.4818, |
|
"eval_loss": 0.47620585560798645, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8504, |
|
"eval_samples_per_second": 18.802, |
|
"eval_steps_per_second": 3.248, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 134.96, |
|
"eval_gen_len": 9.4727, |
|
"eval_loss": 0.4678414463996887, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8619, |
|
"eval_samples_per_second": 18.765, |
|
"eval_steps_per_second": 3.241, |
|
"step": 2463 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_gen_len": 8.9727, |
|
"eval_loss": 0.46089962124824524, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9034, |
|
"eval_samples_per_second": 18.633, |
|
"eval_steps_per_second": 3.219, |
|
"step": 2482 |
|
}, |
|
{ |
|
"epoch": 136.99, |
|
"grad_norm": 0.9683671593666077, |
|
"learning_rate": 1.4455555555555555e-05, |
|
"loss": 0.904, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 136.99, |
|
"eval_gen_len": 8.8182, |
|
"eval_loss": 0.45609110593795776, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8499, |
|
"eval_samples_per_second": 18.804, |
|
"eval_steps_per_second": 3.248, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 137.97, |
|
"eval_gen_len": 8.8, |
|
"eval_loss": 0.448975533246994, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8901, |
|
"eval_samples_per_second": 18.676, |
|
"eval_steps_per_second": 3.226, |
|
"step": 2518 |
|
}, |
|
{ |
|
"epoch": 138.96, |
|
"eval_gen_len": 8.7091, |
|
"eval_loss": 0.44343459606170654, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8897, |
|
"eval_samples_per_second": 18.677, |
|
"eval_steps_per_second": 3.226, |
|
"step": 2536 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_gen_len": 8.6818, |
|
"eval_loss": 0.4378666877746582, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9063, |
|
"eval_samples_per_second": 18.624, |
|
"eval_steps_per_second": 3.217, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 140.99, |
|
"eval_gen_len": 8.1182, |
|
"eval_loss": 0.4321661591529846, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8879, |
|
"eval_samples_per_second": 18.682, |
|
"eval_steps_per_second": 3.227, |
|
"step": 2573 |
|
}, |
|
{ |
|
"epoch": 141.97, |
|
"eval_gen_len": 8.0455, |
|
"eval_loss": 0.42785531282424927, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8597, |
|
"eval_samples_per_second": 18.772, |
|
"eval_steps_per_second": 3.242, |
|
"step": 2591 |
|
}, |
|
{ |
|
"epoch": 142.96, |
|
"eval_gen_len": 7.8909, |
|
"eval_loss": 0.42234906554222107, |
|
"eval_rouge1": 0.0026, |
|
"eval_rouge2": 0.002, |
|
"eval_rougeL": 0.0021, |
|
"eval_rougeLsum": 0.0021, |
|
"eval_runtime": 5.8591, |
|
"eval_samples_per_second": 18.774, |
|
"eval_steps_per_second": 3.243, |
|
"step": 2609 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_gen_len": 7.5727, |
|
"eval_loss": 0.4168229401111603, |
|
"eval_rouge1": 0.0026, |
|
"eval_rouge2": 0.002, |
|
"eval_rougeL": 0.0021, |
|
"eval_rougeLsum": 0.0021, |
|
"eval_runtime": 5.862, |
|
"eval_samples_per_second": 18.765, |
|
"eval_steps_per_second": 3.241, |
|
"step": 2628 |
|
}, |
|
{ |
|
"epoch": 144.99, |
|
"eval_gen_len": 7.4182, |
|
"eval_loss": 0.4125988185405731, |
|
"eval_rouge1": 0.0026, |
|
"eval_rouge2": 0.002, |
|
"eval_rougeL": 0.0021, |
|
"eval_rougeLsum": 0.0021, |
|
"eval_runtime": 5.855, |
|
"eval_samples_per_second": 18.787, |
|
"eval_steps_per_second": 3.245, |
|
"step": 2646 |
|
}, |
|
{ |
|
"epoch": 145.97, |
|
"eval_gen_len": 6.8545, |
|
"eval_loss": 0.40698733925819397, |
|
"eval_rouge1": 0.0026, |
|
"eval_rouge2": 0.002, |
|
"eval_rougeL": 0.0021, |
|
"eval_rougeLsum": 0.0021, |
|
"eval_runtime": 5.8558, |
|
"eval_samples_per_second": 18.785, |
|
"eval_steps_per_second": 3.245, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 146.96, |
|
"eval_gen_len": 6.6, |
|
"eval_loss": 0.4030015468597412, |
|
"eval_rouge1": 0.0026, |
|
"eval_rouge2": 0.002, |
|
"eval_rougeL": 0.0021, |
|
"eval_rougeLsum": 0.0021, |
|
"eval_runtime": 5.8557, |
|
"eval_samples_per_second": 18.785, |
|
"eval_steps_per_second": 3.245, |
|
"step": 2682 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_gen_len": 6.5273, |
|
"eval_loss": 0.39865967631340027, |
|
"eval_rouge1": 0.0044, |
|
"eval_rouge2": 0.0035, |
|
"eval_rougeL": 0.0039, |
|
"eval_rougeLsum": 0.0031, |
|
"eval_runtime": 5.863, |
|
"eval_samples_per_second": 18.762, |
|
"eval_steps_per_second": 3.241, |
|
"step": 2701 |
|
}, |
|
{ |
|
"epoch": 148.99, |
|
"eval_gen_len": 6.5273, |
|
"eval_loss": 0.3959006071090698, |
|
"eval_rouge1": 0.0041, |
|
"eval_rouge2": 0.0035, |
|
"eval_rougeL": 0.0041, |
|
"eval_rougeLsum": 0.0035, |
|
"eval_runtime": 5.857, |
|
"eval_samples_per_second": 18.781, |
|
"eval_steps_per_second": 3.244, |
|
"step": 2719 |
|
}, |
|
{ |
|
"epoch": 149.97, |
|
"eval_gen_len": 6.3, |
|
"eval_loss": 0.3902026414871216, |
|
"eval_rouge1": 0.0049, |
|
"eval_rouge2": 0.0041, |
|
"eval_rougeL": 0.004, |
|
"eval_rougeLsum": 0.0033, |
|
"eval_runtime": 5.855, |
|
"eval_samples_per_second": 18.787, |
|
"eval_steps_per_second": 3.245, |
|
"step": 2737 |
|
}, |
|
{ |
|
"epoch": 150.96, |
|
"eval_gen_len": 6.1909, |
|
"eval_loss": 0.3883272707462311, |
|
"eval_rouge1": 0.0049, |
|
"eval_rouge2": 0.0041, |
|
"eval_rougeL": 0.004, |
|
"eval_rougeLsum": 0.0033, |
|
"eval_runtime": 5.853, |
|
"eval_samples_per_second": 18.794, |
|
"eval_steps_per_second": 3.246, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_gen_len": 6.1182, |
|
"eval_loss": 0.38500654697418213, |
|
"eval_rouge1": 0.006, |
|
"eval_rouge2": 0.0046, |
|
"eval_rougeL": 0.0058, |
|
"eval_rougeLsum": 0.0049, |
|
"eval_runtime": 5.8626, |
|
"eval_samples_per_second": 18.763, |
|
"eval_steps_per_second": 3.241, |
|
"step": 2774 |
|
}, |
|
{ |
|
"epoch": 152.99, |
|
"eval_gen_len": 6.3818, |
|
"eval_loss": 0.38074272871017456, |
|
"eval_rouge1": 0.0065, |
|
"eval_rouge2": 0.0043, |
|
"eval_rougeL": 0.0058, |
|
"eval_rougeLsum": 0.0058, |
|
"eval_runtime": 5.8533, |
|
"eval_samples_per_second": 18.793, |
|
"eval_steps_per_second": 3.246, |
|
"step": 2792 |
|
}, |
|
{ |
|
"epoch": 153.97, |
|
"eval_gen_len": 6.2, |
|
"eval_loss": 0.3779795467853546, |
|
"eval_rouge1": 0.0045, |
|
"eval_rouge2": 0.0022, |
|
"eval_rougeL": 0.0039, |
|
"eval_rougeLsum": 0.0038, |
|
"eval_runtime": 5.8616, |
|
"eval_samples_per_second": 18.766, |
|
"eval_steps_per_second": 3.241, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 154.96, |
|
"eval_gen_len": 6.3636, |
|
"eval_loss": 0.37300601601600647, |
|
"eval_rouge1": 0.0136, |
|
"eval_rouge2": 0.0099, |
|
"eval_rougeL": 0.0122, |
|
"eval_rougeLsum": 0.0117, |
|
"eval_runtime": 5.8638, |
|
"eval_samples_per_second": 18.759, |
|
"eval_steps_per_second": 3.24, |
|
"step": 2828 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_gen_len": 6.4909, |
|
"eval_loss": 0.36999648809432983, |
|
"eval_rouge1": 0.0195, |
|
"eval_rouge2": 0.0152, |
|
"eval_rougeL": 0.0179, |
|
"eval_rougeLsum": 0.0173, |
|
"eval_runtime": 5.8779, |
|
"eval_samples_per_second": 18.714, |
|
"eval_steps_per_second": 3.232, |
|
"step": 2847 |
|
}, |
|
{ |
|
"epoch": 156.99, |
|
"eval_gen_len": 6.5455, |
|
"eval_loss": 0.3666529059410095, |
|
"eval_rouge1": 0.0193, |
|
"eval_rouge2": 0.0135, |
|
"eval_rougeL": 0.0174, |
|
"eval_rougeLsum": 0.0171, |
|
"eval_runtime": 5.8596, |
|
"eval_samples_per_second": 18.773, |
|
"eval_steps_per_second": 3.243, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 157.97, |
|
"eval_gen_len": 6.1909, |
|
"eval_loss": 0.3615466058254242, |
|
"eval_rouge1": 0.019, |
|
"eval_rouge2": 0.0133, |
|
"eval_rougeL": 0.0169, |
|
"eval_rougeLsum": 0.0168, |
|
"eval_runtime": 5.8587, |
|
"eval_samples_per_second": 18.776, |
|
"eval_steps_per_second": 3.243, |
|
"step": 2883 |
|
}, |
|
{ |
|
"epoch": 158.96, |
|
"eval_gen_len": 7.5, |
|
"eval_loss": 0.3599731922149658, |
|
"eval_rouge1": 0.0283, |
|
"eval_rouge2": 0.0195, |
|
"eval_rougeL": 0.0269, |
|
"eval_rougeLsum": 0.0268, |
|
"eval_runtime": 5.8597, |
|
"eval_samples_per_second": 18.772, |
|
"eval_steps_per_second": 3.242, |
|
"step": 2901 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_gen_len": 6.8, |
|
"eval_loss": 0.3567432165145874, |
|
"eval_rouge1": 0.0241, |
|
"eval_rouge2": 0.0168, |
|
"eval_rougeL": 0.022, |
|
"eval_rougeLsum": 0.0216, |
|
"eval_runtime": 5.8734, |
|
"eval_samples_per_second": 18.729, |
|
"eval_steps_per_second": 3.235, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 160.99, |
|
"eval_gen_len": 6.6, |
|
"eval_loss": 0.35366886854171753, |
|
"eval_rouge1": 0.021, |
|
"eval_rouge2": 0.0135, |
|
"eval_rougeL": 0.0189, |
|
"eval_rougeLsum": 0.0184, |
|
"eval_runtime": 5.8665, |
|
"eval_samples_per_second": 18.751, |
|
"eval_steps_per_second": 3.239, |
|
"step": 2938 |
|
}, |
|
{ |
|
"epoch": 161.97, |
|
"eval_gen_len": 8.6818, |
|
"eval_loss": 0.35154151916503906, |
|
"eval_rouge1": 0.0438, |
|
"eval_rouge2": 0.0277, |
|
"eval_rougeL": 0.0401, |
|
"eval_rougeLsum": 0.0401, |
|
"eval_runtime": 5.8694, |
|
"eval_samples_per_second": 18.741, |
|
"eval_steps_per_second": 3.237, |
|
"step": 2956 |
|
}, |
|
{ |
|
"epoch": 162.96, |
|
"eval_gen_len": 8.2636, |
|
"eval_loss": 0.3467026948928833, |
|
"eval_rouge1": 0.0374, |
|
"eval_rouge2": 0.023, |
|
"eval_rougeL": 0.0346, |
|
"eval_rougeLsum": 0.0341, |
|
"eval_runtime": 5.8684, |
|
"eval_samples_per_second": 18.745, |
|
"eval_steps_per_second": 3.238, |
|
"step": 2974 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_gen_len": 7.4636, |
|
"eval_loss": 0.343766987323761, |
|
"eval_rouge1": 0.0284, |
|
"eval_rouge2": 0.0184, |
|
"eval_rougeL": 0.0267, |
|
"eval_rougeLsum": 0.027, |
|
"eval_runtime": 5.876, |
|
"eval_samples_per_second": 18.72, |
|
"eval_steps_per_second": 3.233, |
|
"step": 2993 |
|
}, |
|
{ |
|
"epoch": 164.38, |
|
"grad_norm": 0.7337635159492493, |
|
"learning_rate": 1.3346666666666667e-05, |
|
"loss": 0.5395, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 164.99, |
|
"eval_gen_len": 8.9909, |
|
"eval_loss": 0.3419288694858551, |
|
"eval_rouge1": 0.0445, |
|
"eval_rouge2": 0.0276, |
|
"eval_rougeL": 0.0414, |
|
"eval_rougeLsum": 0.0408, |
|
"eval_runtime": 5.8707, |
|
"eval_samples_per_second": 18.737, |
|
"eval_steps_per_second": 3.236, |
|
"step": 3011 |
|
}, |
|
{ |
|
"epoch": 165.97, |
|
"eval_gen_len": 8.8909, |
|
"eval_loss": 0.33906012773513794, |
|
"eval_rouge1": 0.0446, |
|
"eval_rouge2": 0.0294, |
|
"eval_rougeL": 0.0415, |
|
"eval_rougeLsum": 0.0412, |
|
"eval_runtime": 5.8733, |
|
"eval_samples_per_second": 18.729, |
|
"eval_steps_per_second": 3.235, |
|
"step": 3029 |
|
}, |
|
{ |
|
"epoch": 166.96, |
|
"eval_gen_len": 9.5455, |
|
"eval_loss": 0.3354407548904419, |
|
"eval_rouge1": 0.0498, |
|
"eval_rouge2": 0.032, |
|
"eval_rougeL": 0.0452, |
|
"eval_rougeLsum": 0.0453, |
|
"eval_runtime": 5.8714, |
|
"eval_samples_per_second": 18.735, |
|
"eval_steps_per_second": 3.236, |
|
"step": 3047 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_gen_len": 10.0818, |
|
"eval_loss": 0.33422771096229553, |
|
"eval_rouge1": 0.0579, |
|
"eval_rouge2": 0.0388, |
|
"eval_rougeL": 0.0549, |
|
"eval_rougeLsum": 0.0553, |
|
"eval_runtime": 5.8768, |
|
"eval_samples_per_second": 18.718, |
|
"eval_steps_per_second": 3.233, |
|
"step": 3066 |
|
}, |
|
{ |
|
"epoch": 168.99, |
|
"eval_gen_len": 10.2, |
|
"eval_loss": 0.331695020198822, |
|
"eval_rouge1": 0.0542, |
|
"eval_rouge2": 0.0353, |
|
"eval_rougeL": 0.0508, |
|
"eval_rougeLsum": 0.051, |
|
"eval_runtime": 5.8711, |
|
"eval_samples_per_second": 18.736, |
|
"eval_steps_per_second": 3.236, |
|
"step": 3084 |
|
}, |
|
{ |
|
"epoch": 169.97, |
|
"eval_gen_len": 10.1364, |
|
"eval_loss": 0.3284001052379608, |
|
"eval_rouge1": 0.0555, |
|
"eval_rouge2": 0.0363, |
|
"eval_rougeL": 0.0523, |
|
"eval_rougeLsum": 0.0521, |
|
"eval_runtime": 5.8689, |
|
"eval_samples_per_second": 18.743, |
|
"eval_steps_per_second": 3.237, |
|
"step": 3102 |
|
}, |
|
{ |
|
"epoch": 170.96, |
|
"eval_gen_len": 10.6727, |
|
"eval_loss": 0.32654786109924316, |
|
"eval_rouge1": 0.0562, |
|
"eval_rouge2": 0.0353, |
|
"eval_rougeL": 0.0519, |
|
"eval_rougeLsum": 0.0521, |
|
"eval_runtime": 5.8762, |
|
"eval_samples_per_second": 18.72, |
|
"eval_steps_per_second": 3.233, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_gen_len": 10.7818, |
|
"eval_loss": 0.3229809105396271, |
|
"eval_rouge1": 0.0551, |
|
"eval_rouge2": 0.0346, |
|
"eval_rougeL": 0.0523, |
|
"eval_rougeLsum": 0.0525, |
|
"eval_runtime": 5.8825, |
|
"eval_samples_per_second": 18.699, |
|
"eval_steps_per_second": 3.23, |
|
"step": 3139 |
|
}, |
|
{ |
|
"epoch": 172.99, |
|
"eval_gen_len": 11.3727, |
|
"eval_loss": 0.3223954439163208, |
|
"eval_rouge1": 0.0614, |
|
"eval_rouge2": 0.0388, |
|
"eval_rougeL": 0.0579, |
|
"eval_rougeLsum": 0.0585, |
|
"eval_runtime": 5.8822, |
|
"eval_samples_per_second": 18.701, |
|
"eval_steps_per_second": 3.23, |
|
"step": 3157 |
|
}, |
|
{ |
|
"epoch": 173.97, |
|
"eval_gen_len": 11.2909, |
|
"eval_loss": 0.31792977452278137, |
|
"eval_rouge1": 0.0584, |
|
"eval_rouge2": 0.0328, |
|
"eval_rougeL": 0.055, |
|
"eval_rougeLsum": 0.0553, |
|
"eval_runtime": 5.8754, |
|
"eval_samples_per_second": 18.722, |
|
"eval_steps_per_second": 3.234, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 174.96, |
|
"eval_gen_len": 12.2455, |
|
"eval_loss": 0.31657084822654724, |
|
"eval_rouge1": 0.0649, |
|
"eval_rouge2": 0.0392, |
|
"eval_rougeL": 0.0615, |
|
"eval_rougeLsum": 0.0619, |
|
"eval_runtime": 5.9198, |
|
"eval_samples_per_second": 18.582, |
|
"eval_steps_per_second": 3.21, |
|
"step": 3193 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_gen_len": 11.7545, |
|
"eval_loss": 0.3131771981716156, |
|
"eval_rouge1": 0.0605, |
|
"eval_rouge2": 0.0341, |
|
"eval_rougeL": 0.0568, |
|
"eval_rougeLsum": 0.0571, |
|
"eval_runtime": 5.8941, |
|
"eval_samples_per_second": 18.663, |
|
"eval_steps_per_second": 3.224, |
|
"step": 3212 |
|
}, |
|
{ |
|
"epoch": 176.99, |
|
"eval_gen_len": 12.2364, |
|
"eval_loss": 0.3109656870365143, |
|
"eval_rouge1": 0.0639, |
|
"eval_rouge2": 0.0396, |
|
"eval_rougeL": 0.0614, |
|
"eval_rougeLsum": 0.0619, |
|
"eval_runtime": 5.8859, |
|
"eval_samples_per_second": 18.689, |
|
"eval_steps_per_second": 3.228, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 177.97, |
|
"eval_gen_len": 11.9273, |
|
"eval_loss": 0.3089582324028015, |
|
"eval_rouge1": 0.0664, |
|
"eval_rouge2": 0.04, |
|
"eval_rougeL": 0.0619, |
|
"eval_rougeLsum": 0.0626, |
|
"eval_runtime": 5.873, |
|
"eval_samples_per_second": 18.73, |
|
"eval_steps_per_second": 3.235, |
|
"step": 3248 |
|
}, |
|
{ |
|
"epoch": 178.96, |
|
"eval_gen_len": 12.3364, |
|
"eval_loss": 0.30663853883743286, |
|
"eval_rouge1": 0.0721, |
|
"eval_rouge2": 0.0455, |
|
"eval_rougeL": 0.0685, |
|
"eval_rougeLsum": 0.0688, |
|
"eval_runtime": 5.8871, |
|
"eval_samples_per_second": 18.685, |
|
"eval_steps_per_second": 3.227, |
|
"step": 3266 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_gen_len": 12.6, |
|
"eval_loss": 0.3049904704093933, |
|
"eval_rouge1": 0.0727, |
|
"eval_rouge2": 0.0447, |
|
"eval_rougeL": 0.0691, |
|
"eval_rougeLsum": 0.0693, |
|
"eval_runtime": 5.8952, |
|
"eval_samples_per_second": 18.659, |
|
"eval_steps_per_second": 3.223, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 180.99, |
|
"eval_gen_len": 12.2636, |
|
"eval_loss": 0.30238986015319824, |
|
"eval_rouge1": 0.0716, |
|
"eval_rouge2": 0.0436, |
|
"eval_rougeL": 0.0684, |
|
"eval_rougeLsum": 0.0688, |
|
"eval_runtime": 5.8902, |
|
"eval_samples_per_second": 18.675, |
|
"eval_steps_per_second": 3.226, |
|
"step": 3303 |
|
}, |
|
{ |
|
"epoch": 181.97, |
|
"eval_gen_len": 15.2, |
|
"eval_loss": 0.29928120970726013, |
|
"eval_rouge1": 0.0901, |
|
"eval_rouge2": 0.0567, |
|
"eval_rougeL": 0.0848, |
|
"eval_rougeLsum": 0.0851, |
|
"eval_runtime": 5.9062, |
|
"eval_samples_per_second": 18.624, |
|
"eval_steps_per_second": 3.217, |
|
"step": 3321 |
|
}, |
|
{ |
|
"epoch": 182.96, |
|
"eval_gen_len": 13.8182, |
|
"eval_loss": 0.2973878085613251, |
|
"eval_rouge1": 0.0874, |
|
"eval_rouge2": 0.0583, |
|
"eval_rougeL": 0.084, |
|
"eval_rougeLsum": 0.0838, |
|
"eval_runtime": 5.892, |
|
"eval_samples_per_second": 18.67, |
|
"eval_steps_per_second": 3.225, |
|
"step": 3339 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_gen_len": 14.8091, |
|
"eval_loss": 0.29529863595962524, |
|
"eval_rouge1": 0.0924, |
|
"eval_rouge2": 0.0616, |
|
"eval_rougeL": 0.0892, |
|
"eval_rougeLsum": 0.0893, |
|
"eval_runtime": 5.9216, |
|
"eval_samples_per_second": 18.576, |
|
"eval_steps_per_second": 3.209, |
|
"step": 3358 |
|
}, |
|
{ |
|
"epoch": 184.99, |
|
"eval_gen_len": 16.4, |
|
"eval_loss": 0.29348161816596985, |
|
"eval_rouge1": 0.1029, |
|
"eval_rouge2": 0.0663, |
|
"eval_rougeL": 0.0987, |
|
"eval_rougeLsum": 0.0984, |
|
"eval_runtime": 5.8929, |
|
"eval_samples_per_second": 18.666, |
|
"eval_steps_per_second": 3.224, |
|
"step": 3376 |
|
}, |
|
{ |
|
"epoch": 185.97, |
|
"eval_gen_len": 15.0364, |
|
"eval_loss": 0.2902657389640808, |
|
"eval_rouge1": 0.0923, |
|
"eval_rouge2": 0.0603, |
|
"eval_rougeL": 0.089, |
|
"eval_rougeLsum": 0.0887, |
|
"eval_runtime": 5.8988, |
|
"eval_samples_per_second": 18.648, |
|
"eval_steps_per_second": 3.221, |
|
"step": 3394 |
|
}, |
|
{ |
|
"epoch": 186.96, |
|
"eval_gen_len": 16.7, |
|
"eval_loss": 0.2885950207710266, |
|
"eval_rouge1": 0.1027, |
|
"eval_rouge2": 0.0661, |
|
"eval_rougeL": 0.0985, |
|
"eval_rougeLsum": 0.099, |
|
"eval_runtime": 5.9087, |
|
"eval_samples_per_second": 18.617, |
|
"eval_steps_per_second": 3.216, |
|
"step": 3412 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_gen_len": 15.0455, |
|
"eval_loss": 0.2858668267726898, |
|
"eval_rouge1": 0.0998, |
|
"eval_rouge2": 0.0686, |
|
"eval_rougeL": 0.0979, |
|
"eval_rougeLsum": 0.0973, |
|
"eval_runtime": 5.9128, |
|
"eval_samples_per_second": 18.604, |
|
"eval_steps_per_second": 3.213, |
|
"step": 3431 |
|
}, |
|
{ |
|
"epoch": 188.99, |
|
"eval_gen_len": 16.7, |
|
"eval_loss": 0.28371575474739075, |
|
"eval_rouge1": 0.1081, |
|
"eval_rouge2": 0.0733, |
|
"eval_rougeL": 0.105, |
|
"eval_rougeLsum": 0.1051, |
|
"eval_runtime": 5.9179, |
|
"eval_samples_per_second": 18.588, |
|
"eval_steps_per_second": 3.211, |
|
"step": 3449 |
|
}, |
|
{ |
|
"epoch": 189.97, |
|
"eval_gen_len": 16.9364, |
|
"eval_loss": 0.28239956498146057, |
|
"eval_rouge1": 0.1176, |
|
"eval_rouge2": 0.0809, |
|
"eval_rougeL": 0.1142, |
|
"eval_rougeLsum": 0.1136, |
|
"eval_runtime": 5.9392, |
|
"eval_samples_per_second": 18.521, |
|
"eval_steps_per_second": 3.199, |
|
"step": 3467 |
|
}, |
|
{ |
|
"epoch": 190.96, |
|
"eval_gen_len": 17.1545, |
|
"eval_loss": 0.28112414479255676, |
|
"eval_rouge1": 0.1281, |
|
"eval_rouge2": 0.0923, |
|
"eval_rougeL": 0.1251, |
|
"eval_rougeLsum": 0.1247, |
|
"eval_runtime": 5.9439, |
|
"eval_samples_per_second": 18.506, |
|
"eval_steps_per_second": 3.197, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 191.78, |
|
"grad_norm": 0.6559975743293762, |
|
"learning_rate": 1.2235555555555556e-05, |
|
"loss": 0.4165, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_gen_len": 16.9909, |
|
"eval_loss": 0.2794179916381836, |
|
"eval_rouge1": 0.1326, |
|
"eval_rouge2": 0.0983, |
|
"eval_rougeL": 0.1308, |
|
"eval_rougeLsum": 0.1303, |
|
"eval_runtime": 5.9583, |
|
"eval_samples_per_second": 18.462, |
|
"eval_steps_per_second": 3.189, |
|
"step": 3504 |
|
}, |
|
{ |
|
"epoch": 192.99, |
|
"eval_gen_len": 17.2818, |
|
"eval_loss": 0.2769763767719269, |
|
"eval_rouge1": 0.1377, |
|
"eval_rouge2": 0.1021, |
|
"eval_rougeL": 0.1352, |
|
"eval_rougeLsum": 0.1352, |
|
"eval_runtime": 5.9436, |
|
"eval_samples_per_second": 18.507, |
|
"eval_steps_per_second": 3.197, |
|
"step": 3522 |
|
}, |
|
{ |
|
"epoch": 193.97, |
|
"eval_gen_len": 16.6727, |
|
"eval_loss": 0.27478569746017456, |
|
"eval_rouge1": 0.1421, |
|
"eval_rouge2": 0.1072, |
|
"eval_rougeL": 0.1391, |
|
"eval_rougeLsum": 0.1389, |
|
"eval_runtime": 5.9492, |
|
"eval_samples_per_second": 18.49, |
|
"eval_steps_per_second": 3.194, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 194.96, |
|
"eval_gen_len": 17.6273, |
|
"eval_loss": 0.273343026638031, |
|
"eval_rouge1": 0.1536, |
|
"eval_rouge2": 0.1166, |
|
"eval_rougeL": 0.1499, |
|
"eval_rougeLsum": 0.15, |
|
"eval_runtime": 5.9258, |
|
"eval_samples_per_second": 18.563, |
|
"eval_steps_per_second": 3.206, |
|
"step": 3558 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_gen_len": 17.6182, |
|
"eval_loss": 0.27078884840011597, |
|
"eval_rouge1": 0.1575, |
|
"eval_rouge2": 0.1196, |
|
"eval_rougeL": 0.1531, |
|
"eval_rougeLsum": 0.1529, |
|
"eval_runtime": 5.9196, |
|
"eval_samples_per_second": 18.582, |
|
"eval_steps_per_second": 3.21, |
|
"step": 3577 |
|
}, |
|
{ |
|
"epoch": 196.99, |
|
"eval_gen_len": 18.1727, |
|
"eval_loss": 0.2679530382156372, |
|
"eval_rouge1": 0.1544, |
|
"eval_rouge2": 0.1152, |
|
"eval_rougeL": 0.1492, |
|
"eval_rougeLsum": 0.1492, |
|
"eval_runtime": 5.9115, |
|
"eval_samples_per_second": 18.608, |
|
"eval_steps_per_second": 3.214, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 197.97, |
|
"eval_gen_len": 17.6636, |
|
"eval_loss": 0.26689696311950684, |
|
"eval_rouge1": 0.1615, |
|
"eval_rouge2": 0.1264, |
|
"eval_rougeL": 0.1579, |
|
"eval_rougeLsum": 0.1577, |
|
"eval_runtime": 5.911, |
|
"eval_samples_per_second": 18.609, |
|
"eval_steps_per_second": 3.214, |
|
"step": 3613 |
|
}, |
|
{ |
|
"epoch": 198.96, |
|
"eval_gen_len": 18.3182, |
|
"eval_loss": 0.26582667231559753, |
|
"eval_rouge1": 0.1687, |
|
"eval_rouge2": 0.1322, |
|
"eval_rougeL": 0.165, |
|
"eval_rougeLsum": 0.1646, |
|
"eval_runtime": 5.9133, |
|
"eval_samples_per_second": 18.602, |
|
"eval_steps_per_second": 3.213, |
|
"step": 3631 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_gen_len": 18.3091, |
|
"eval_loss": 0.262999951839447, |
|
"eval_rouge1": 0.1753, |
|
"eval_rouge2": 0.1408, |
|
"eval_rougeL": 0.1726, |
|
"eval_rougeLsum": 0.1721, |
|
"eval_runtime": 5.9621, |
|
"eval_samples_per_second": 18.45, |
|
"eval_steps_per_second": 3.187, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 200.99, |
|
"eval_gen_len": 18.6182, |
|
"eval_loss": 0.2615664601325989, |
|
"eval_rouge1": 0.1803, |
|
"eval_rouge2": 0.1452, |
|
"eval_rougeL": 0.1776, |
|
"eval_rougeLsum": 0.1767, |
|
"eval_runtime": 5.9566, |
|
"eval_samples_per_second": 18.467, |
|
"eval_steps_per_second": 3.19, |
|
"step": 3668 |
|
}, |
|
{ |
|
"epoch": 201.97, |
|
"eval_gen_len": 18.4818, |
|
"eval_loss": 0.26064223051071167, |
|
"eval_rouge1": 0.1883, |
|
"eval_rouge2": 0.1542, |
|
"eval_rougeL": 0.1857, |
|
"eval_rougeLsum": 0.1854, |
|
"eval_runtime": 5.9305, |
|
"eval_samples_per_second": 18.548, |
|
"eval_steps_per_second": 3.204, |
|
"step": 3686 |
|
}, |
|
{ |
|
"epoch": 202.96, |
|
"eval_gen_len": 18.6364, |
|
"eval_loss": 0.2593740224838257, |
|
"eval_rouge1": 0.1822, |
|
"eval_rouge2": 0.1461, |
|
"eval_rougeL": 0.179, |
|
"eval_rougeLsum": 0.1784, |
|
"eval_runtime": 5.9634, |
|
"eval_samples_per_second": 18.446, |
|
"eval_steps_per_second": 3.186, |
|
"step": 3704 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_gen_len": 18.5909, |
|
"eval_loss": 0.2574484348297119, |
|
"eval_rouge1": 0.1785, |
|
"eval_rouge2": 0.1424, |
|
"eval_rougeL": 0.1754, |
|
"eval_rougeLsum": 0.1741, |
|
"eval_runtime": 5.9326, |
|
"eval_samples_per_second": 18.542, |
|
"eval_steps_per_second": 3.203, |
|
"step": 3723 |
|
}, |
|
{ |
|
"epoch": 204.99, |
|
"eval_gen_len": 18.5909, |
|
"eval_loss": 0.255962997674942, |
|
"eval_rouge1": 0.182, |
|
"eval_rouge2": 0.1475, |
|
"eval_rougeL": 0.1799, |
|
"eval_rougeLsum": 0.1789, |
|
"eval_runtime": 5.9645, |
|
"eval_samples_per_second": 18.442, |
|
"eval_steps_per_second": 3.186, |
|
"step": 3741 |
|
}, |
|
{ |
|
"epoch": 205.97, |
|
"eval_gen_len": 18.3818, |
|
"eval_loss": 0.25388580560684204, |
|
"eval_rouge1": 0.1899, |
|
"eval_rouge2": 0.1557, |
|
"eval_rougeL": 0.1862, |
|
"eval_rougeLsum": 0.1861, |
|
"eval_runtime": 5.9231, |
|
"eval_samples_per_second": 18.571, |
|
"eval_steps_per_second": 3.208, |
|
"step": 3759 |
|
}, |
|
{ |
|
"epoch": 206.96, |
|
"eval_gen_len": 18.3727, |
|
"eval_loss": 0.251926988363266, |
|
"eval_rouge1": 0.1962, |
|
"eval_rouge2": 0.1635, |
|
"eval_rougeL": 0.1935, |
|
"eval_rougeLsum": 0.1929, |
|
"eval_runtime": 5.9351, |
|
"eval_samples_per_second": 18.534, |
|
"eval_steps_per_second": 3.201, |
|
"step": 3777 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_gen_len": 18.5818, |
|
"eval_loss": 0.25200676918029785, |
|
"eval_rouge1": 0.2002, |
|
"eval_rouge2": 0.1666, |
|
"eval_rougeL": 0.197, |
|
"eval_rougeLsum": 0.1964, |
|
"eval_runtime": 5.9349, |
|
"eval_samples_per_second": 18.535, |
|
"eval_steps_per_second": 3.201, |
|
"step": 3796 |
|
}, |
|
{ |
|
"epoch": 208.99, |
|
"eval_gen_len": 18.4182, |
|
"eval_loss": 0.24954193830490112, |
|
"eval_rouge1": 0.2016, |
|
"eval_rouge2": 0.1708, |
|
"eval_rougeL": 0.1997, |
|
"eval_rougeLsum": 0.1994, |
|
"eval_runtime": 5.9236, |
|
"eval_samples_per_second": 18.57, |
|
"eval_steps_per_second": 3.208, |
|
"step": 3814 |
|
}, |
|
{ |
|
"epoch": 209.97, |
|
"eval_gen_len": 18.5182, |
|
"eval_loss": 0.2487880438566208, |
|
"eval_rouge1": 0.2029, |
|
"eval_rouge2": 0.172, |
|
"eval_rougeL": 0.2008, |
|
"eval_rougeLsum": 0.2005, |
|
"eval_runtime": 5.9666, |
|
"eval_samples_per_second": 18.436, |
|
"eval_steps_per_second": 3.184, |
|
"step": 3832 |
|
}, |
|
{ |
|
"epoch": 210.96, |
|
"eval_gen_len": 18.7455, |
|
"eval_loss": 0.246944397687912, |
|
"eval_rouge1": 0.2126, |
|
"eval_rouge2": 0.183, |
|
"eval_rougeL": 0.2107, |
|
"eval_rougeLsum": 0.2102, |
|
"eval_runtime": 5.9334, |
|
"eval_samples_per_second": 18.539, |
|
"eval_steps_per_second": 3.202, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_gen_len": 18.7455, |
|
"eval_loss": 0.24503479897975922, |
|
"eval_rouge1": 0.2153, |
|
"eval_rouge2": 0.1832, |
|
"eval_rougeL": 0.213, |
|
"eval_rougeLsum": 0.2126, |
|
"eval_runtime": 5.9384, |
|
"eval_samples_per_second": 18.524, |
|
"eval_steps_per_second": 3.2, |
|
"step": 3869 |
|
}, |
|
{ |
|
"epoch": 212.99, |
|
"eval_gen_len": 18.8, |
|
"eval_loss": 0.24539507925510406, |
|
"eval_rouge1": 0.2199, |
|
"eval_rouge2": 0.1891, |
|
"eval_rougeL": 0.2176, |
|
"eval_rougeLsum": 0.2173, |
|
"eval_runtime": 5.9326, |
|
"eval_samples_per_second": 18.542, |
|
"eval_steps_per_second": 3.203, |
|
"step": 3887 |
|
}, |
|
{ |
|
"epoch": 213.97, |
|
"eval_gen_len": 18.7455, |
|
"eval_loss": 0.24339380860328674, |
|
"eval_rouge1": 0.2253, |
|
"eval_rouge2": 0.1972, |
|
"eval_rougeL": 0.2239, |
|
"eval_rougeLsum": 0.2238, |
|
"eval_runtime": 5.9368, |
|
"eval_samples_per_second": 18.529, |
|
"eval_steps_per_second": 3.2, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 214.96, |
|
"eval_gen_len": 18.8, |
|
"eval_loss": 0.24222548305988312, |
|
"eval_rouge1": 0.2245, |
|
"eval_rouge2": 0.1953, |
|
"eval_rougeL": 0.2234, |
|
"eval_rougeLsum": 0.2227, |
|
"eval_runtime": 5.9303, |
|
"eval_samples_per_second": 18.549, |
|
"eval_steps_per_second": 3.204, |
|
"step": 3923 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_gen_len": 18.8, |
|
"eval_loss": 0.24038065969944, |
|
"eval_rouge1": 0.2269, |
|
"eval_rouge2": 0.1974, |
|
"eval_rougeL": 0.2255, |
|
"eval_rougeLsum": 0.2251, |
|
"eval_runtime": 5.9423, |
|
"eval_samples_per_second": 18.511, |
|
"eval_steps_per_second": 3.197, |
|
"step": 3942 |
|
}, |
|
{ |
|
"epoch": 216.99, |
|
"eval_gen_len": 18.8, |
|
"eval_loss": 0.24085824191570282, |
|
"eval_rouge1": 0.2324, |
|
"eval_rouge2": 0.203, |
|
"eval_rougeL": 0.2305, |
|
"eval_rougeLsum": 0.2302, |
|
"eval_runtime": 5.9374, |
|
"eval_samples_per_second": 18.527, |
|
"eval_steps_per_second": 3.2, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 217.97, |
|
"eval_gen_len": 18.8, |
|
"eval_loss": 0.2390824556350708, |
|
"eval_rouge1": 0.2328, |
|
"eval_rouge2": 0.204, |
|
"eval_rougeL": 0.2309, |
|
"eval_rougeLsum": 0.2307, |
|
"eval_runtime": 5.9716, |
|
"eval_samples_per_second": 18.421, |
|
"eval_steps_per_second": 3.182, |
|
"step": 3978 |
|
}, |
|
{ |
|
"epoch": 218.96, |
|
"eval_gen_len": 18.9636, |
|
"eval_loss": 0.23841167986392975, |
|
"eval_rouge1": 0.2394, |
|
"eval_rouge2": 0.2109, |
|
"eval_rougeL": 0.2381, |
|
"eval_rougeLsum": 0.238, |
|
"eval_runtime": 5.9528, |
|
"eval_samples_per_second": 18.479, |
|
"eval_steps_per_second": 3.192, |
|
"step": 3996 |
|
}, |
|
{ |
|
"epoch": 219.18, |
|
"grad_norm": 1.037503957748413, |
|
"learning_rate": 1.1124444444444444e-05, |
|
"loss": 0.3439, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_gen_len": 18.9636, |
|
"eval_loss": 0.2358517199754715, |
|
"eval_rouge1": 0.2413, |
|
"eval_rouge2": 0.2128, |
|
"eval_rougeL": 0.2404, |
|
"eval_rougeLsum": 0.2403, |
|
"eval_runtime": 5.9869, |
|
"eval_samples_per_second": 18.373, |
|
"eval_steps_per_second": 3.174, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 220.99, |
|
"eval_gen_len": 18.9636, |
|
"eval_loss": 0.2353217452764511, |
|
"eval_rouge1": 0.2411, |
|
"eval_rouge2": 0.2122, |
|
"eval_rougeL": 0.2401, |
|
"eval_rougeLsum": 0.2398, |
|
"eval_runtime": 5.9744, |
|
"eval_samples_per_second": 18.412, |
|
"eval_steps_per_second": 3.18, |
|
"step": 4033 |
|
}, |
|
{ |
|
"epoch": 221.97, |
|
"eval_gen_len": 18.9636, |
|
"eval_loss": 0.23452366888523102, |
|
"eval_rouge1": 0.2423, |
|
"eval_rouge2": 0.2131, |
|
"eval_rougeL": 0.2414, |
|
"eval_rougeLsum": 0.2409, |
|
"eval_runtime": 5.9345, |
|
"eval_samples_per_second": 18.536, |
|
"eval_steps_per_second": 3.202, |
|
"step": 4051 |
|
}, |
|
{ |
|
"epoch": 222.96, |
|
"eval_gen_len": 18.9636, |
|
"eval_loss": 0.23285672068595886, |
|
"eval_rouge1": 0.2417, |
|
"eval_rouge2": 0.2133, |
|
"eval_rougeL": 0.2407, |
|
"eval_rougeLsum": 0.2404, |
|
"eval_runtime": 5.9399, |
|
"eval_samples_per_second": 18.519, |
|
"eval_steps_per_second": 3.199, |
|
"step": 4069 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_gen_len": 18.9636, |
|
"eval_loss": 0.23224669694900513, |
|
"eval_rouge1": 0.2439, |
|
"eval_rouge2": 0.2166, |
|
"eval_rougeL": 0.2434, |
|
"eval_rougeLsum": 0.2431, |
|
"eval_runtime": 5.9565, |
|
"eval_samples_per_second": 18.467, |
|
"eval_steps_per_second": 3.19, |
|
"step": 4088 |
|
}, |
|
{ |
|
"epoch": 224.99, |
|
"eval_gen_len": 18.9636, |
|
"eval_loss": 0.231593519449234, |
|
"eval_rouge1": 0.2416, |
|
"eval_rouge2": 0.2138, |
|
"eval_rougeL": 0.2413, |
|
"eval_rougeLsum": 0.2409, |
|
"eval_runtime": 5.9359, |
|
"eval_samples_per_second": 18.531, |
|
"eval_steps_per_second": 3.201, |
|
"step": 4106 |
|
}, |
|
{ |
|
"epoch": 225.97, |
|
"eval_gen_len": 18.9636, |
|
"eval_loss": 0.23053088784217834, |
|
"eval_rouge1": 0.2423, |
|
"eval_rouge2": 0.2147, |
|
"eval_rougeL": 0.242, |
|
"eval_rougeLsum": 0.2414, |
|
"eval_runtime": 5.9359, |
|
"eval_samples_per_second": 18.531, |
|
"eval_steps_per_second": 3.201, |
|
"step": 4124 |
|
}, |
|
{ |
|
"epoch": 226.96, |
|
"eval_gen_len": 18.9636, |
|
"eval_loss": 0.22999462485313416, |
|
"eval_rouge1": 0.243, |
|
"eval_rouge2": 0.215, |
|
"eval_rougeL": 0.2427, |
|
"eval_rougeLsum": 0.242, |
|
"eval_runtime": 5.939, |
|
"eval_samples_per_second": 18.522, |
|
"eval_steps_per_second": 3.199, |
|
"step": 4142 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"eval_gen_len": 18.9182, |
|
"eval_loss": 0.22881445288658142, |
|
"eval_rouge1": 0.2472, |
|
"eval_rouge2": 0.2201, |
|
"eval_rougeL": 0.2469, |
|
"eval_rougeLsum": 0.2466, |
|
"eval_runtime": 5.9542, |
|
"eval_samples_per_second": 18.474, |
|
"eval_steps_per_second": 3.191, |
|
"step": 4161 |
|
}, |
|
{ |
|
"epoch": 228.99, |
|
"eval_gen_len": 18.9636, |
|
"eval_loss": 0.2282283455133438, |
|
"eval_rouge1": 0.247, |
|
"eval_rouge2": 0.2195, |
|
"eval_rougeL": 0.2468, |
|
"eval_rougeLsum": 0.2464, |
|
"eval_runtime": 5.9427, |
|
"eval_samples_per_second": 18.51, |
|
"eval_steps_per_second": 3.197, |
|
"step": 4179 |
|
}, |
|
{ |
|
"epoch": 229.97, |
|
"eval_gen_len": 18.9182, |
|
"eval_loss": 0.22686214745044708, |
|
"eval_rouge1": 0.2437, |
|
"eval_rouge2": 0.2163, |
|
"eval_rougeL": 0.2436, |
|
"eval_rougeLsum": 0.2434, |
|
"eval_runtime": 5.9383, |
|
"eval_samples_per_second": 18.524, |
|
"eval_steps_per_second": 3.2, |
|
"step": 4197 |
|
}, |
|
{ |
|
"epoch": 230.96, |
|
"eval_gen_len": 18.9636, |
|
"eval_loss": 0.22540777921676636, |
|
"eval_rouge1": 0.2485, |
|
"eval_rouge2": 0.2218, |
|
"eval_rougeL": 0.2484, |
|
"eval_rougeLsum": 0.2478, |
|
"eval_runtime": 5.9379, |
|
"eval_samples_per_second": 18.525, |
|
"eval_steps_per_second": 3.2, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"eval_gen_len": 18.7455, |
|
"eval_loss": 0.22596728801727295, |
|
"eval_rouge1": 0.2458, |
|
"eval_rouge2": 0.2194, |
|
"eval_rougeL": 0.2456, |
|
"eval_rougeLsum": 0.2455, |
|
"eval_runtime": 5.9457, |
|
"eval_samples_per_second": 18.501, |
|
"eval_steps_per_second": 3.196, |
|
"step": 4234 |
|
}, |
|
{ |
|
"epoch": 232.99, |
|
"eval_gen_len": 18.6182, |
|
"eval_loss": 0.22396238148212433, |
|
"eval_rouge1": 0.2482, |
|
"eval_rouge2": 0.2227, |
|
"eval_rougeL": 0.2481, |
|
"eval_rougeLsum": 0.248, |
|
"eval_runtime": 5.9411, |
|
"eval_samples_per_second": 18.515, |
|
"eval_steps_per_second": 3.198, |
|
"step": 4252 |
|
}, |
|
{ |
|
"epoch": 233.97, |
|
"eval_gen_len": 18.6182, |
|
"eval_loss": 0.22270821034908295, |
|
"eval_rouge1": 0.2442, |
|
"eval_rouge2": 0.2178, |
|
"eval_rougeL": 0.2438, |
|
"eval_rougeLsum": 0.2435, |
|
"eval_runtime": 5.9443, |
|
"eval_samples_per_second": 18.505, |
|
"eval_steps_per_second": 3.196, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 234.96, |
|
"eval_gen_len": 18.6182, |
|
"eval_loss": 0.222365602850914, |
|
"eval_rouge1": 0.2491, |
|
"eval_rouge2": 0.2241, |
|
"eval_rougeL": 0.2487, |
|
"eval_rougeLsum": 0.2488, |
|
"eval_runtime": 5.9399, |
|
"eval_samples_per_second": 18.519, |
|
"eval_steps_per_second": 3.199, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 236.0, |
|
"eval_gen_len": 18.6182, |
|
"eval_loss": 0.22221778333187103, |
|
"eval_rouge1": 0.2486, |
|
"eval_rouge2": 0.2233, |
|
"eval_rougeL": 0.2484, |
|
"eval_rougeLsum": 0.2484, |
|
"eval_runtime": 5.9418, |
|
"eval_samples_per_second": 18.513, |
|
"eval_steps_per_second": 3.198, |
|
"step": 4307 |
|
}, |
|
{ |
|
"epoch": 236.99, |
|
"eval_gen_len": 18.2727, |
|
"eval_loss": 0.2206820845603943, |
|
"eval_rouge1": 0.2443, |
|
"eval_rouge2": 0.219, |
|
"eval_rougeL": 0.2434, |
|
"eval_rougeLsum": 0.2434, |
|
"eval_runtime": 5.9282, |
|
"eval_samples_per_second": 18.555, |
|
"eval_steps_per_second": 3.205, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 237.97, |
|
"eval_gen_len": 17.4091, |
|
"eval_loss": 0.22046072781085968, |
|
"eval_rouge1": 0.2327, |
|
"eval_rouge2": 0.2091, |
|
"eval_rougeL": 0.2321, |
|
"eval_rougeLsum": 0.2325, |
|
"eval_runtime": 5.9336, |
|
"eval_samples_per_second": 18.539, |
|
"eval_steps_per_second": 3.202, |
|
"step": 4343 |
|
}, |
|
{ |
|
"epoch": 238.96, |
|
"eval_gen_len": 15.1636, |
|
"eval_loss": 0.21863390505313873, |
|
"eval_rouge1": 0.1989, |
|
"eval_rouge2": 0.1774, |
|
"eval_rougeL": 0.1978, |
|
"eval_rougeLsum": 0.1979, |
|
"eval_runtime": 5.9311, |
|
"eval_samples_per_second": 18.546, |
|
"eval_steps_per_second": 3.203, |
|
"step": 4361 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"eval_gen_len": 16.1909, |
|
"eval_loss": 0.2192383110523224, |
|
"eval_rouge1": 0.2148, |
|
"eval_rouge2": 0.1923, |
|
"eval_rougeL": 0.2137, |
|
"eval_rougeLsum": 0.2144, |
|
"eval_runtime": 5.9365, |
|
"eval_samples_per_second": 18.529, |
|
"eval_steps_per_second": 3.201, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 240.99, |
|
"eval_gen_len": 13.0909, |
|
"eval_loss": 0.2176760584115982, |
|
"eval_rouge1": 0.1742, |
|
"eval_rouge2": 0.1548, |
|
"eval_rougeL": 0.1725, |
|
"eval_rougeLsum": 0.1723, |
|
"eval_runtime": 5.9161, |
|
"eval_samples_per_second": 18.593, |
|
"eval_steps_per_second": 3.212, |
|
"step": 4398 |
|
}, |
|
{ |
|
"epoch": 241.97, |
|
"eval_gen_len": 11.7091, |
|
"eval_loss": 0.21764642000198364, |
|
"eval_rouge1": 0.1541, |
|
"eval_rouge2": 0.1376, |
|
"eval_rougeL": 0.1529, |
|
"eval_rougeLsum": 0.1537, |
|
"eval_runtime": 5.9162, |
|
"eval_samples_per_second": 18.593, |
|
"eval_steps_per_second": 3.212, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 242.96, |
|
"eval_gen_len": 12.9, |
|
"eval_loss": 0.21736116707324982, |
|
"eval_rouge1": 0.1671, |
|
"eval_rouge2": 0.1495, |
|
"eval_rougeL": 0.1661, |
|
"eval_rougeLsum": 0.1671, |
|
"eval_runtime": 5.9221, |
|
"eval_samples_per_second": 18.574, |
|
"eval_steps_per_second": 3.208, |
|
"step": 4434 |
|
}, |
|
{ |
|
"epoch": 244.0, |
|
"eval_gen_len": 10.4818, |
|
"eval_loss": 0.21571263670921326, |
|
"eval_rouge1": 0.1364, |
|
"eval_rouge2": 0.1201, |
|
"eval_rougeL": 0.1354, |
|
"eval_rougeLsum": 0.135, |
|
"eval_runtime": 5.9422, |
|
"eval_samples_per_second": 18.512, |
|
"eval_steps_per_second": 3.197, |
|
"step": 4453 |
|
}, |
|
{ |
|
"epoch": 244.99, |
|
"eval_gen_len": 8.9273, |
|
"eval_loss": 0.21506664156913757, |
|
"eval_rouge1": 0.1149, |
|
"eval_rouge2": 0.101, |
|
"eval_rougeL": 0.1133, |
|
"eval_rougeLsum": 0.1136, |
|
"eval_runtime": 5.9071, |
|
"eval_samples_per_second": 18.622, |
|
"eval_steps_per_second": 3.216, |
|
"step": 4471 |
|
}, |
|
{ |
|
"epoch": 245.97, |
|
"eval_gen_len": 7.7182, |
|
"eval_loss": 0.21382498741149902, |
|
"eval_rouge1": 0.0989, |
|
"eval_rouge2": 0.0866, |
|
"eval_rougeL": 0.0976, |
|
"eval_rougeLsum": 0.0981, |
|
"eval_runtime": 5.9086, |
|
"eval_samples_per_second": 18.617, |
|
"eval_steps_per_second": 3.216, |
|
"step": 4489 |
|
}, |
|
{ |
|
"epoch": 246.58, |
|
"grad_norm": 0.4568144977092743, |
|
"learning_rate": 1.0013333333333335e-05, |
|
"loss": 0.2977, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 246.96, |
|
"eval_gen_len": 7.3727, |
|
"eval_loss": 0.21434056758880615, |
|
"eval_rouge1": 0.0942, |
|
"eval_rouge2": 0.0823, |
|
"eval_rougeL": 0.093, |
|
"eval_rougeLsum": 0.0937, |
|
"eval_runtime": 5.9102, |
|
"eval_samples_per_second": 18.612, |
|
"eval_steps_per_second": 3.215, |
|
"step": 4507 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"eval_gen_len": 6.8636, |
|
"eval_loss": 0.2125895619392395, |
|
"eval_rouge1": 0.0884, |
|
"eval_rouge2": 0.0777, |
|
"eval_rougeL": 0.0876, |
|
"eval_rougeLsum": 0.0884, |
|
"eval_runtime": 5.9078, |
|
"eval_samples_per_second": 18.619, |
|
"eval_steps_per_second": 3.216, |
|
"step": 4526 |
|
}, |
|
{ |
|
"epoch": 248.99, |
|
"eval_gen_len": 6.6909, |
|
"eval_loss": 0.21264444291591644, |
|
"eval_rouge1": 0.0856, |
|
"eval_rouge2": 0.0753, |
|
"eval_rougeL": 0.0851, |
|
"eval_rougeLsum": 0.0853, |
|
"eval_runtime": 5.9128, |
|
"eval_samples_per_second": 18.604, |
|
"eval_steps_per_second": 3.213, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 249.97, |
|
"eval_gen_len": 6.8636, |
|
"eval_loss": 0.2110782116651535, |
|
"eval_rouge1": 0.0871, |
|
"eval_rouge2": 0.0764, |
|
"eval_rougeL": 0.0865, |
|
"eval_rougeLsum": 0.0866, |
|
"eval_runtime": 5.911, |
|
"eval_samples_per_second": 18.609, |
|
"eval_steps_per_second": 3.214, |
|
"step": 4562 |
|
}, |
|
{ |
|
"epoch": 250.96, |
|
"eval_gen_len": 6.3455, |
|
"eval_loss": 0.2114747315645218, |
|
"eval_rouge1": 0.0813, |
|
"eval_rouge2": 0.071, |
|
"eval_rougeL": 0.0805, |
|
"eval_rougeLsum": 0.0808, |
|
"eval_runtime": 5.9048, |
|
"eval_samples_per_second": 18.629, |
|
"eval_steps_per_second": 3.218, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 252.0, |
|
"eval_gen_len": 5.1364, |
|
"eval_loss": 0.21088837087154388, |
|
"eval_rouge1": 0.0658, |
|
"eval_rouge2": 0.0587, |
|
"eval_rougeL": 0.0647, |
|
"eval_rougeLsum": 0.0656, |
|
"eval_runtime": 5.9122, |
|
"eval_samples_per_second": 18.606, |
|
"eval_steps_per_second": 3.214, |
|
"step": 4599 |
|
}, |
|
{ |
|
"epoch": 252.99, |
|
"eval_gen_len": 4.1, |
|
"eval_loss": 0.21026724576950073, |
|
"eval_rouge1": 0.0525, |
|
"eval_rouge2": 0.0474, |
|
"eval_rougeL": 0.0523, |
|
"eval_rougeLsum": 0.0531, |
|
"eval_runtime": 5.8924, |
|
"eval_samples_per_second": 18.668, |
|
"eval_steps_per_second": 3.225, |
|
"step": 4617 |
|
}, |
|
{ |
|
"epoch": 253.97, |
|
"eval_gen_len": 3.4091, |
|
"eval_loss": 0.20923300087451935, |
|
"eval_rouge1": 0.0422, |
|
"eval_rouge2": 0.0371, |
|
"eval_rougeL": 0.0417, |
|
"eval_rougeLsum": 0.0427, |
|
"eval_runtime": 5.9069, |
|
"eval_samples_per_second": 18.622, |
|
"eval_steps_per_second": 3.217, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 254.96, |
|
"eval_gen_len": 3.4091, |
|
"eval_loss": 0.20883551239967346, |
|
"eval_rouge1": 0.0425, |
|
"eval_rouge2": 0.0373, |
|
"eval_rougeL": 0.0421, |
|
"eval_rougeLsum": 0.0432, |
|
"eval_runtime": 5.8899, |
|
"eval_samples_per_second": 18.676, |
|
"eval_steps_per_second": 3.226, |
|
"step": 4653 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"eval_gen_len": 3.0545, |
|
"eval_loss": 0.20799440145492554, |
|
"eval_rouge1": 0.0383, |
|
"eval_rouge2": 0.0339, |
|
"eval_rougeL": 0.0381, |
|
"eval_rougeLsum": 0.0386, |
|
"eval_runtime": 5.8948, |
|
"eval_samples_per_second": 18.66, |
|
"eval_steps_per_second": 3.223, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 256.99, |
|
"eval_gen_len": 2.0364, |
|
"eval_loss": 0.20746435225009918, |
|
"eval_rouge1": 0.0256, |
|
"eval_rouge2": 0.0228, |
|
"eval_rougeL": 0.0255, |
|
"eval_rougeLsum": 0.0259, |
|
"eval_runtime": 5.8826, |
|
"eval_samples_per_second": 18.699, |
|
"eval_steps_per_second": 3.23, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 257.97, |
|
"eval_gen_len": 2.0364, |
|
"eval_loss": 0.2079101800918579, |
|
"eval_rouge1": 0.026, |
|
"eval_rouge2": 0.0231, |
|
"eval_rougeL": 0.0258, |
|
"eval_rougeLsum": 0.0263, |
|
"eval_runtime": 5.9214, |
|
"eval_samples_per_second": 18.577, |
|
"eval_steps_per_second": 3.209, |
|
"step": 4708 |
|
}, |
|
{ |
|
"epoch": 258.96, |
|
"eval_gen_len": 2.0182, |
|
"eval_loss": 0.20639775693416595, |
|
"eval_rouge1": 0.0232, |
|
"eval_rouge2": 0.0203, |
|
"eval_rougeL": 0.0231, |
|
"eval_rougeLsum": 0.0233, |
|
"eval_runtime": 5.9298, |
|
"eval_samples_per_second": 18.55, |
|
"eval_steps_per_second": 3.204, |
|
"step": 4726 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"eval_gen_len": 2.0182, |
|
"eval_loss": 0.20615024864673615, |
|
"eval_rouge1": 0.0238, |
|
"eval_rouge2": 0.0202, |
|
"eval_rougeL": 0.0237, |
|
"eval_rougeLsum": 0.0238, |
|
"eval_runtime": 5.9324, |
|
"eval_samples_per_second": 18.542, |
|
"eval_steps_per_second": 3.203, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 260.99, |
|
"eval_gen_len": 1.7091, |
|
"eval_loss": 0.20586760342121124, |
|
"eval_rouge1": 0.02, |
|
"eval_rouge2": 0.0175, |
|
"eval_rougeL": 0.0202, |
|
"eval_rougeLsum": 0.0201, |
|
"eval_runtime": 5.8796, |
|
"eval_samples_per_second": 18.709, |
|
"eval_steps_per_second": 3.231, |
|
"step": 4763 |
|
}, |
|
{ |
|
"epoch": 261.97, |
|
"eval_gen_len": 0.6909, |
|
"eval_loss": 0.20486456155776978, |
|
"eval_rouge1": 0.0091, |
|
"eval_rouge2": 0.0079, |
|
"eval_rougeL": 0.0091, |
|
"eval_rougeLsum": 0.0091, |
|
"eval_runtime": 5.8759, |
|
"eval_samples_per_second": 18.721, |
|
"eval_steps_per_second": 3.234, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 262.96, |
|
"eval_gen_len": 0.6909, |
|
"eval_loss": 0.20466168224811554, |
|
"eval_rouge1": 0.0091, |
|
"eval_rouge2": 0.0079, |
|
"eval_rougeL": 0.0091, |
|
"eval_rougeLsum": 0.0091, |
|
"eval_runtime": 5.9163, |
|
"eval_samples_per_second": 18.593, |
|
"eval_steps_per_second": 3.211, |
|
"step": 4799 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"eval_gen_len": 0.6818, |
|
"eval_loss": 0.20416179299354553, |
|
"eval_rouge1": 0.0082, |
|
"eval_rouge2": 0.0071, |
|
"eval_rougeL": 0.0081, |
|
"eval_rougeLsum": 0.0082, |
|
"eval_runtime": 5.8989, |
|
"eval_samples_per_second": 18.647, |
|
"eval_steps_per_second": 3.221, |
|
"step": 4818 |
|
}, |
|
{ |
|
"epoch": 264.99, |
|
"eval_gen_len": 0.3364, |
|
"eval_loss": 0.20311486721038818, |
|
"eval_rouge1": 0.0044, |
|
"eval_rouge2": 0.0038, |
|
"eval_rougeL": 0.0044, |
|
"eval_rougeLsum": 0.0046, |
|
"eval_runtime": 5.8639, |
|
"eval_samples_per_second": 18.759, |
|
"eval_steps_per_second": 3.24, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 265.97, |
|
"eval_gen_len": 0.3455, |
|
"eval_loss": 0.20284703373908997, |
|
"eval_rouge1": 0.0057, |
|
"eval_rouge2": 0.0051, |
|
"eval_rougeL": 0.0057, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 5.874, |
|
"eval_samples_per_second": 18.726, |
|
"eval_steps_per_second": 3.235, |
|
"step": 4854 |
|
}, |
|
{ |
|
"epoch": 266.96, |
|
"eval_gen_len": 0.3455, |
|
"eval_loss": 0.20207703113555908, |
|
"eval_rouge1": 0.0057, |
|
"eval_rouge2": 0.0051, |
|
"eval_rougeL": 0.0057, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 5.871, |
|
"eval_samples_per_second": 18.736, |
|
"eval_steps_per_second": 3.236, |
|
"step": 4872 |
|
}, |
|
{ |
|
"epoch": 268.0, |
|
"eval_gen_len": 0.3455, |
|
"eval_loss": 0.20189516246318817, |
|
"eval_rouge1": 0.0057, |
|
"eval_rouge2": 0.0051, |
|
"eval_rougeL": 0.0057, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 5.8784, |
|
"eval_samples_per_second": 18.713, |
|
"eval_steps_per_second": 3.232, |
|
"step": 4891 |
|
}, |
|
{ |
|
"epoch": 268.99, |
|
"eval_gen_len": 0.3455, |
|
"eval_loss": 0.20159202814102173, |
|
"eval_rouge1": 0.0057, |
|
"eval_rouge2": 0.0051, |
|
"eval_rougeL": 0.0057, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 5.8675, |
|
"eval_samples_per_second": 18.747, |
|
"eval_steps_per_second": 3.238, |
|
"step": 4909 |
|
}, |
|
{ |
|
"epoch": 269.97, |
|
"eval_gen_len": 0.3455, |
|
"eval_loss": 0.20119339227676392, |
|
"eval_rouge1": 0.0057, |
|
"eval_rouge2": 0.0051, |
|
"eval_rougeL": 0.0057, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 5.8693, |
|
"eval_samples_per_second": 18.741, |
|
"eval_steps_per_second": 3.237, |
|
"step": 4927 |
|
}, |
|
{ |
|
"epoch": 270.96, |
|
"eval_gen_len": 0.3455, |
|
"eval_loss": 0.20063255727291107, |
|
"eval_rouge1": 0.0057, |
|
"eval_rouge2": 0.0051, |
|
"eval_rougeL": 0.0057, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 5.8634, |
|
"eval_samples_per_second": 18.76, |
|
"eval_steps_per_second": 3.24, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"eval_gen_len": 0.3455, |
|
"eval_loss": 0.20042632520198822, |
|
"eval_rouge1": 0.0057, |
|
"eval_rouge2": 0.0051, |
|
"eval_rougeL": 0.0057, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 5.9002, |
|
"eval_samples_per_second": 18.644, |
|
"eval_steps_per_second": 3.22, |
|
"step": 4964 |
|
}, |
|
{ |
|
"epoch": 272.99, |
|
"eval_gen_len": 0.3455, |
|
"eval_loss": 0.19937776029109955, |
|
"eval_rouge1": 0.0057, |
|
"eval_rouge2": 0.0051, |
|
"eval_rougeL": 0.0057, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 5.866, |
|
"eval_samples_per_second": 18.752, |
|
"eval_steps_per_second": 3.239, |
|
"step": 4982 |
|
}, |
|
{ |
|
"epoch": 273.97, |
|
"grad_norm": 0.47718000411987305, |
|
"learning_rate": 8.902222222222224e-06, |
|
"loss": 0.2666, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 273.97, |
|
"eval_gen_len": 0.5182, |
|
"eval_loss": 0.199092298746109, |
|
"eval_rouge1": 0.008, |
|
"eval_rouge2": 0.0071, |
|
"eval_rougeL": 0.008, |
|
"eval_rougeLsum": 0.008, |
|
"eval_runtime": 5.8818, |
|
"eval_samples_per_second": 18.702, |
|
"eval_steps_per_second": 3.23, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 274.96, |
|
"eval_gen_len": 0.5182, |
|
"eval_loss": 0.1990521252155304, |
|
"eval_rouge1": 0.008, |
|
"eval_rouge2": 0.0071, |
|
"eval_rougeL": 0.008, |
|
"eval_rougeLsum": 0.008, |
|
"eval_runtime": 5.8794, |
|
"eval_samples_per_second": 18.709, |
|
"eval_steps_per_second": 3.232, |
|
"step": 5018 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"eval_gen_len": 0.5182, |
|
"eval_loss": 0.19846394658088684, |
|
"eval_rouge1": 0.008, |
|
"eval_rouge2": 0.0071, |
|
"eval_rougeL": 0.008, |
|
"eval_rougeLsum": 0.008, |
|
"eval_runtime": 5.8793, |
|
"eval_samples_per_second": 18.71, |
|
"eval_steps_per_second": 3.232, |
|
"step": 5037 |
|
}, |
|
{ |
|
"epoch": 276.99, |
|
"eval_gen_len": 0.5182, |
|
"eval_loss": 0.1975831538438797, |
|
"eval_rouge1": 0.008, |
|
"eval_rouge2": 0.0071, |
|
"eval_rougeL": 0.008, |
|
"eval_rougeLsum": 0.008, |
|
"eval_runtime": 5.8684, |
|
"eval_samples_per_second": 18.744, |
|
"eval_steps_per_second": 3.238, |
|
"step": 5055 |
|
}, |
|
{ |
|
"epoch": 277.97, |
|
"eval_gen_len": 0.1727, |
|
"eval_loss": 0.19685131311416626, |
|
"eval_rouge1": 0.0025, |
|
"eval_rouge2": 0.0024, |
|
"eval_rougeL": 0.0025, |
|
"eval_rougeLsum": 0.0025, |
|
"eval_runtime": 5.8685, |
|
"eval_samples_per_second": 18.744, |
|
"eval_steps_per_second": 3.238, |
|
"step": 5073 |
|
}, |
|
{ |
|
"epoch": 278.96, |
|
"eval_gen_len": 0.1727, |
|
"eval_loss": 0.19685351848602295, |
|
"eval_rouge1": 0.0025, |
|
"eval_rouge2": 0.0024, |
|
"eval_rougeL": 0.0025, |
|
"eval_rougeLsum": 0.0025, |
|
"eval_runtime": 5.87, |
|
"eval_samples_per_second": 18.739, |
|
"eval_steps_per_second": 3.237, |
|
"step": 5091 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"eval_gen_len": 0.3455, |
|
"eval_loss": 0.19706358015537262, |
|
"eval_rouge1": 0.0057, |
|
"eval_rouge2": 0.0051, |
|
"eval_rougeL": 0.0057, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 5.8807, |
|
"eval_samples_per_second": 18.705, |
|
"eval_steps_per_second": 3.231, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 280.99, |
|
"eval_gen_len": 0.1727, |
|
"eval_loss": 0.19580155611038208, |
|
"eval_rouge1": 0.0025, |
|
"eval_rouge2": 0.0024, |
|
"eval_rougeL": 0.0025, |
|
"eval_rougeLsum": 0.0025, |
|
"eval_runtime": 5.8696, |
|
"eval_samples_per_second": 18.741, |
|
"eval_steps_per_second": 3.237, |
|
"step": 5128 |
|
}, |
|
{ |
|
"epoch": 281.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1954393833875656, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8668, |
|
"eval_samples_per_second": 18.75, |
|
"eval_steps_per_second": 3.239, |
|
"step": 5146 |
|
}, |
|
{ |
|
"epoch": 282.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.19552475214004517, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8798, |
|
"eval_samples_per_second": 18.708, |
|
"eval_steps_per_second": 3.231, |
|
"step": 5164 |
|
}, |
|
{ |
|
"epoch": 284.0, |
|
"eval_gen_len": 0.1727, |
|
"eval_loss": 0.1950557678937912, |
|
"eval_rouge1": 0.0025, |
|
"eval_rouge2": 0.0024, |
|
"eval_rougeL": 0.0025, |
|
"eval_rougeLsum": 0.0025, |
|
"eval_runtime": 5.8764, |
|
"eval_samples_per_second": 18.719, |
|
"eval_steps_per_second": 3.233, |
|
"step": 5183 |
|
}, |
|
{ |
|
"epoch": 284.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1939947009086609, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8768, |
|
"eval_samples_per_second": 18.718, |
|
"eval_steps_per_second": 3.233, |
|
"step": 5201 |
|
}, |
|
{ |
|
"epoch": 285.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.19393964111804962, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8764, |
|
"eval_samples_per_second": 18.719, |
|
"eval_steps_per_second": 3.233, |
|
"step": 5219 |
|
}, |
|
{ |
|
"epoch": 286.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.19383706152439117, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8701, |
|
"eval_samples_per_second": 18.739, |
|
"eval_steps_per_second": 3.237, |
|
"step": 5237 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1930641084909439, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8828, |
|
"eval_samples_per_second": 18.699, |
|
"eval_steps_per_second": 3.23, |
|
"step": 5256 |
|
}, |
|
{ |
|
"epoch": 288.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.19216330349445343, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8721, |
|
"eval_samples_per_second": 18.733, |
|
"eval_steps_per_second": 3.236, |
|
"step": 5274 |
|
}, |
|
{ |
|
"epoch": 289.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1919858455657959, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.886, |
|
"eval_samples_per_second": 18.688, |
|
"eval_steps_per_second": 3.228, |
|
"step": 5292 |
|
}, |
|
{ |
|
"epoch": 290.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.19181759655475616, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8804, |
|
"eval_samples_per_second": 18.706, |
|
"eval_steps_per_second": 3.231, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 292.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1913154274225235, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8887, |
|
"eval_samples_per_second": 18.68, |
|
"eval_steps_per_second": 3.227, |
|
"step": 5329 |
|
}, |
|
{ |
|
"epoch": 292.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.19096632301807404, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8809, |
|
"eval_samples_per_second": 18.704, |
|
"eval_steps_per_second": 3.231, |
|
"step": 5347 |
|
}, |
|
{ |
|
"epoch": 293.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.19034302234649658, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.886, |
|
"eval_samples_per_second": 18.688, |
|
"eval_steps_per_second": 3.228, |
|
"step": 5365 |
|
}, |
|
{ |
|
"epoch": 294.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18984819948673248, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8905, |
|
"eval_samples_per_second": 18.674, |
|
"eval_steps_per_second": 3.226, |
|
"step": 5383 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1901622861623764, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8956, |
|
"eval_samples_per_second": 18.658, |
|
"eval_steps_per_second": 3.223, |
|
"step": 5402 |
|
}, |
|
{ |
|
"epoch": 296.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18923012912273407, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8738, |
|
"eval_samples_per_second": 18.727, |
|
"eval_steps_per_second": 3.235, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 297.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18847127258777618, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8804, |
|
"eval_samples_per_second": 18.706, |
|
"eval_steps_per_second": 3.231, |
|
"step": 5438 |
|
}, |
|
{ |
|
"epoch": 298.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1884116530418396, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8771, |
|
"eval_samples_per_second": 18.717, |
|
"eval_steps_per_second": 3.233, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18776828050613403, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8882, |
|
"eval_samples_per_second": 18.681, |
|
"eval_steps_per_second": 3.227, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 300.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18751147389411926, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8894, |
|
"eval_samples_per_second": 18.677, |
|
"eval_steps_per_second": 3.226, |
|
"step": 5493 |
|
}, |
|
{ |
|
"epoch": 301.37, |
|
"grad_norm": 0.6001901030540466, |
|
"learning_rate": 7.791111111111111e-06, |
|
"loss": 0.2463, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 301.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18754757940769196, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8862, |
|
"eval_samples_per_second": 18.688, |
|
"eval_steps_per_second": 3.228, |
|
"step": 5511 |
|
}, |
|
{ |
|
"epoch": 302.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18714185059070587, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8804, |
|
"eval_samples_per_second": 18.706, |
|
"eval_steps_per_second": 3.231, |
|
"step": 5529 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18668492138385773, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8924, |
|
"eval_samples_per_second": 18.668, |
|
"eval_steps_per_second": 3.225, |
|
"step": 5548 |
|
}, |
|
{ |
|
"epoch": 304.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18612505495548248, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8787, |
|
"eval_samples_per_second": 18.712, |
|
"eval_steps_per_second": 3.232, |
|
"step": 5566 |
|
}, |
|
{ |
|
"epoch": 305.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18622124195098877, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8963, |
|
"eval_samples_per_second": 18.656, |
|
"eval_steps_per_second": 3.222, |
|
"step": 5584 |
|
}, |
|
{ |
|
"epoch": 306.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18581855297088623, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8851, |
|
"eval_samples_per_second": 18.691, |
|
"eval_steps_per_second": 3.228, |
|
"step": 5602 |
|
}, |
|
{ |
|
"epoch": 308.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1851491779088974, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8834, |
|
"eval_samples_per_second": 18.697, |
|
"eval_steps_per_second": 3.229, |
|
"step": 5621 |
|
}, |
|
{ |
|
"epoch": 308.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18541742861270905, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8802, |
|
"eval_samples_per_second": 18.707, |
|
"eval_steps_per_second": 3.231, |
|
"step": 5639 |
|
}, |
|
{ |
|
"epoch": 309.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1846253126859665, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.88, |
|
"eval_samples_per_second": 18.708, |
|
"eval_steps_per_second": 3.231, |
|
"step": 5657 |
|
}, |
|
{ |
|
"epoch": 310.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1842205971479416, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8908, |
|
"eval_samples_per_second": 18.673, |
|
"eval_steps_per_second": 3.225, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1838139295578003, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8826, |
|
"eval_samples_per_second": 18.699, |
|
"eval_steps_per_second": 3.23, |
|
"step": 5694 |
|
}, |
|
{ |
|
"epoch": 312.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18346160650253296, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8686, |
|
"eval_samples_per_second": 18.744, |
|
"eval_steps_per_second": 3.238, |
|
"step": 5712 |
|
}, |
|
{ |
|
"epoch": 313.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18300552666187286, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8811, |
|
"eval_samples_per_second": 18.704, |
|
"eval_steps_per_second": 3.231, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 314.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18304497003555298, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8736, |
|
"eval_samples_per_second": 18.728, |
|
"eval_steps_per_second": 3.235, |
|
"step": 5748 |
|
}, |
|
{ |
|
"epoch": 316.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1824423372745514, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9117, |
|
"eval_samples_per_second": 18.607, |
|
"eval_steps_per_second": 3.214, |
|
"step": 5767 |
|
}, |
|
{ |
|
"epoch": 316.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18214993178844452, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8772, |
|
"eval_samples_per_second": 18.716, |
|
"eval_steps_per_second": 3.233, |
|
"step": 5785 |
|
}, |
|
{ |
|
"epoch": 317.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1819440871477127, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8707, |
|
"eval_samples_per_second": 18.737, |
|
"eval_steps_per_second": 3.236, |
|
"step": 5803 |
|
}, |
|
{ |
|
"epoch": 318.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18124094605445862, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9162, |
|
"eval_samples_per_second": 18.593, |
|
"eval_steps_per_second": 3.212, |
|
"step": 5821 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.18139633536338806, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.874, |
|
"eval_samples_per_second": 18.726, |
|
"eval_steps_per_second": 3.235, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 320.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1810206174850464, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8919, |
|
"eval_samples_per_second": 18.67, |
|
"eval_steps_per_second": 3.225, |
|
"step": 5858 |
|
}, |
|
{ |
|
"epoch": 321.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1808764487504959, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9173, |
|
"eval_samples_per_second": 18.589, |
|
"eval_steps_per_second": 3.211, |
|
"step": 5876 |
|
}, |
|
{ |
|
"epoch": 322.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1801535189151764, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9176, |
|
"eval_samples_per_second": 18.589, |
|
"eval_steps_per_second": 3.211, |
|
"step": 5894 |
|
}, |
|
{ |
|
"epoch": 324.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1799170821905136, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9289, |
|
"eval_samples_per_second": 18.553, |
|
"eval_steps_per_second": 3.205, |
|
"step": 5913 |
|
}, |
|
{ |
|
"epoch": 324.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1797485649585724, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9221, |
|
"eval_samples_per_second": 18.575, |
|
"eval_steps_per_second": 3.208, |
|
"step": 5931 |
|
}, |
|
{ |
|
"epoch": 325.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17966806888580322, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9096, |
|
"eval_samples_per_second": 18.614, |
|
"eval_steps_per_second": 3.215, |
|
"step": 5949 |
|
}, |
|
{ |
|
"epoch": 326.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17945848405361176, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8715, |
|
"eval_samples_per_second": 18.735, |
|
"eval_steps_per_second": 3.236, |
|
"step": 5967 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17859123647212982, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8842, |
|
"eval_samples_per_second": 18.694, |
|
"eval_steps_per_second": 3.229, |
|
"step": 5986 |
|
}, |
|
{ |
|
"epoch": 328.77, |
|
"grad_norm": 0.45411407947540283, |
|
"learning_rate": 6.680000000000001e-06, |
|
"loss": 0.2311, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 328.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17845258116722107, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8879, |
|
"eval_samples_per_second": 18.682, |
|
"eval_steps_per_second": 3.227, |
|
"step": 6004 |
|
}, |
|
{ |
|
"epoch": 329.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17820703983306885, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8859, |
|
"eval_samples_per_second": 18.689, |
|
"eval_steps_per_second": 3.228, |
|
"step": 6022 |
|
}, |
|
{ |
|
"epoch": 330.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17832966148853302, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8753, |
|
"eval_samples_per_second": 18.723, |
|
"eval_steps_per_second": 3.234, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 332.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17780448496341705, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8819, |
|
"eval_samples_per_second": 18.702, |
|
"eval_steps_per_second": 3.23, |
|
"step": 6059 |
|
}, |
|
{ |
|
"epoch": 332.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17747904360294342, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8701, |
|
"eval_samples_per_second": 18.739, |
|
"eval_steps_per_second": 3.237, |
|
"step": 6077 |
|
}, |
|
{ |
|
"epoch": 333.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17767922580242157, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8742, |
|
"eval_samples_per_second": 18.726, |
|
"eval_steps_per_second": 3.234, |
|
"step": 6095 |
|
}, |
|
{ |
|
"epoch": 334.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1771049201488495, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9091, |
|
"eval_samples_per_second": 18.615, |
|
"eval_steps_per_second": 3.215, |
|
"step": 6113 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1770164966583252, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8812, |
|
"eval_samples_per_second": 18.704, |
|
"eval_steps_per_second": 3.231, |
|
"step": 6132 |
|
}, |
|
{ |
|
"epoch": 336.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17682689428329468, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8773, |
|
"eval_samples_per_second": 18.716, |
|
"eval_steps_per_second": 3.233, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 337.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1767437607049942, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8745, |
|
"eval_samples_per_second": 18.725, |
|
"eval_steps_per_second": 3.234, |
|
"step": 6168 |
|
}, |
|
{ |
|
"epoch": 338.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17660827934741974, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.876, |
|
"eval_samples_per_second": 18.72, |
|
"eval_steps_per_second": 3.233, |
|
"step": 6186 |
|
}, |
|
{ |
|
"epoch": 340.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1762937307357788, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8747, |
|
"eval_samples_per_second": 18.724, |
|
"eval_steps_per_second": 3.234, |
|
"step": 6205 |
|
}, |
|
{ |
|
"epoch": 340.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17532125115394592, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.877, |
|
"eval_samples_per_second": 18.717, |
|
"eval_steps_per_second": 3.233, |
|
"step": 6223 |
|
}, |
|
{ |
|
"epoch": 341.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17553770542144775, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8818, |
|
"eval_samples_per_second": 18.702, |
|
"eval_steps_per_second": 3.23, |
|
"step": 6241 |
|
}, |
|
{ |
|
"epoch": 342.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17563851177692413, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8839, |
|
"eval_samples_per_second": 18.695, |
|
"eval_steps_per_second": 3.229, |
|
"step": 6259 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17437517642974854, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8841, |
|
"eval_samples_per_second": 18.694, |
|
"eval_steps_per_second": 3.229, |
|
"step": 6278 |
|
}, |
|
{ |
|
"epoch": 344.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17455054819583893, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8746, |
|
"eval_samples_per_second": 18.725, |
|
"eval_steps_per_second": 3.234, |
|
"step": 6296 |
|
}, |
|
{ |
|
"epoch": 345.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17481422424316406, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8787, |
|
"eval_samples_per_second": 18.712, |
|
"eval_steps_per_second": 3.232, |
|
"step": 6314 |
|
}, |
|
{ |
|
"epoch": 346.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1744970828294754, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8781, |
|
"eval_samples_per_second": 18.713, |
|
"eval_steps_per_second": 3.232, |
|
"step": 6332 |
|
}, |
|
{ |
|
"epoch": 348.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17412132024765015, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8833, |
|
"eval_samples_per_second": 18.697, |
|
"eval_steps_per_second": 3.229, |
|
"step": 6351 |
|
}, |
|
{ |
|
"epoch": 348.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1738380789756775, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9075, |
|
"eval_samples_per_second": 18.621, |
|
"eval_steps_per_second": 3.216, |
|
"step": 6369 |
|
}, |
|
{ |
|
"epoch": 349.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17379747331142426, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.876, |
|
"eval_samples_per_second": 18.72, |
|
"eval_steps_per_second": 3.234, |
|
"step": 6387 |
|
}, |
|
{ |
|
"epoch": 350.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17336434125900269, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8785, |
|
"eval_samples_per_second": 18.712, |
|
"eval_steps_per_second": 3.232, |
|
"step": 6405 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17307358980178833, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9197, |
|
"eval_samples_per_second": 18.582, |
|
"eval_steps_per_second": 3.21, |
|
"step": 6424 |
|
}, |
|
{ |
|
"epoch": 352.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17290958762168884, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8928, |
|
"eval_samples_per_second": 18.667, |
|
"eval_steps_per_second": 3.224, |
|
"step": 6442 |
|
}, |
|
{ |
|
"epoch": 353.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1726769059896469, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8785, |
|
"eval_samples_per_second": 18.712, |
|
"eval_steps_per_second": 3.232, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 354.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17268246412277222, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8721, |
|
"eval_samples_per_second": 18.733, |
|
"eval_steps_per_second": 3.236, |
|
"step": 6478 |
|
}, |
|
{ |
|
"epoch": 356.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17257879674434662, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8803, |
|
"eval_samples_per_second": 18.706, |
|
"eval_steps_per_second": 3.231, |
|
"step": 6497 |
|
}, |
|
{ |
|
"epoch": 356.16, |
|
"grad_norm": 0.3692683279514313, |
|
"learning_rate": 5.56888888888889e-06, |
|
"loss": 0.2192, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 356.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17185170948505402, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8845, |
|
"eval_samples_per_second": 18.693, |
|
"eval_steps_per_second": 3.229, |
|
"step": 6515 |
|
}, |
|
{ |
|
"epoch": 357.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17126674950122833, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8816, |
|
"eval_samples_per_second": 18.702, |
|
"eval_steps_per_second": 3.23, |
|
"step": 6533 |
|
}, |
|
{ |
|
"epoch": 358.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17136740684509277, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8802, |
|
"eval_samples_per_second": 18.707, |
|
"eval_steps_per_second": 3.231, |
|
"step": 6551 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1711340844631195, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8839, |
|
"eval_samples_per_second": 18.695, |
|
"eval_steps_per_second": 3.229, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 360.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1712319403886795, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8782, |
|
"eval_samples_per_second": 18.713, |
|
"eval_steps_per_second": 3.232, |
|
"step": 6588 |
|
}, |
|
{ |
|
"epoch": 361.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1710105836391449, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8829, |
|
"eval_samples_per_second": 18.698, |
|
"eval_steps_per_second": 3.23, |
|
"step": 6606 |
|
}, |
|
{ |
|
"epoch": 362.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17070402204990387, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9043, |
|
"eval_samples_per_second": 18.631, |
|
"eval_steps_per_second": 3.218, |
|
"step": 6624 |
|
}, |
|
{ |
|
"epoch": 364.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1703125238418579, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9174, |
|
"eval_samples_per_second": 18.589, |
|
"eval_steps_per_second": 3.211, |
|
"step": 6643 |
|
}, |
|
{ |
|
"epoch": 364.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.17009203135967255, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9165, |
|
"eval_samples_per_second": 18.592, |
|
"eval_steps_per_second": 3.211, |
|
"step": 6661 |
|
}, |
|
{ |
|
"epoch": 365.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1701081246137619, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8801, |
|
"eval_samples_per_second": 18.707, |
|
"eval_steps_per_second": 3.231, |
|
"step": 6679 |
|
}, |
|
{ |
|
"epoch": 366.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16997285187244415, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8816, |
|
"eval_samples_per_second": 18.703, |
|
"eval_steps_per_second": 3.23, |
|
"step": 6697 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16970933973789215, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8906, |
|
"eval_samples_per_second": 18.674, |
|
"eval_steps_per_second": 3.225, |
|
"step": 6716 |
|
}, |
|
{ |
|
"epoch": 368.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1696108877658844, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8896, |
|
"eval_samples_per_second": 18.677, |
|
"eval_steps_per_second": 3.226, |
|
"step": 6734 |
|
}, |
|
{ |
|
"epoch": 369.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1694546341896057, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9145, |
|
"eval_samples_per_second": 18.598, |
|
"eval_steps_per_second": 3.212, |
|
"step": 6752 |
|
}, |
|
{ |
|
"epoch": 370.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16927814483642578, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9179, |
|
"eval_samples_per_second": 18.588, |
|
"eval_steps_per_second": 3.211, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 372.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16911160945892334, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8804, |
|
"eval_samples_per_second": 18.706, |
|
"eval_steps_per_second": 3.231, |
|
"step": 6789 |
|
}, |
|
{ |
|
"epoch": 372.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16869549453258514, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8847, |
|
"eval_samples_per_second": 18.693, |
|
"eval_steps_per_second": 3.229, |
|
"step": 6807 |
|
}, |
|
{ |
|
"epoch": 373.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16840766370296478, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9184, |
|
"eval_samples_per_second": 18.586, |
|
"eval_steps_per_second": 3.21, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 374.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16836071014404297, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9124, |
|
"eval_samples_per_second": 18.605, |
|
"eval_steps_per_second": 3.214, |
|
"step": 6843 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.168260395526886, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8823, |
|
"eval_samples_per_second": 18.7, |
|
"eval_steps_per_second": 3.23, |
|
"step": 6862 |
|
}, |
|
{ |
|
"epoch": 376.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16808076202869415, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8791, |
|
"eval_samples_per_second": 18.71, |
|
"eval_steps_per_second": 3.232, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 377.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16791433095932007, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8871, |
|
"eval_samples_per_second": 18.685, |
|
"eval_steps_per_second": 3.227, |
|
"step": 6898 |
|
}, |
|
{ |
|
"epoch": 378.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16784635186195374, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8948, |
|
"eval_samples_per_second": 18.661, |
|
"eval_steps_per_second": 3.223, |
|
"step": 6916 |
|
}, |
|
{ |
|
"epoch": 380.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16753236949443817, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8837, |
|
"eval_samples_per_second": 18.696, |
|
"eval_steps_per_second": 3.229, |
|
"step": 6935 |
|
}, |
|
{ |
|
"epoch": 380.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16738325357437134, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8709, |
|
"eval_samples_per_second": 18.736, |
|
"eval_steps_per_second": 3.236, |
|
"step": 6953 |
|
}, |
|
{ |
|
"epoch": 381.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16733896732330322, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8784, |
|
"eval_samples_per_second": 18.713, |
|
"eval_steps_per_second": 3.232, |
|
"step": 6971 |
|
}, |
|
{ |
|
"epoch": 382.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16720926761627197, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8818, |
|
"eval_samples_per_second": 18.702, |
|
"eval_steps_per_second": 3.23, |
|
"step": 6989 |
|
}, |
|
{ |
|
"epoch": 383.56, |
|
"grad_norm": 0.32142043113708496, |
|
"learning_rate": 4.457777777777778e-06, |
|
"loss": 0.211, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16671238839626312, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8769, |
|
"eval_samples_per_second": 18.717, |
|
"eval_steps_per_second": 3.233, |
|
"step": 7008 |
|
}, |
|
{ |
|
"epoch": 384.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16662339866161346, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8678, |
|
"eval_samples_per_second": 18.746, |
|
"eval_steps_per_second": 3.238, |
|
"step": 7026 |
|
}, |
|
{ |
|
"epoch": 385.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16629952192306519, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8786, |
|
"eval_samples_per_second": 18.712, |
|
"eval_steps_per_second": 3.232, |
|
"step": 7044 |
|
}, |
|
{ |
|
"epoch": 386.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16644792258739471, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8767, |
|
"eval_samples_per_second": 18.718, |
|
"eval_steps_per_second": 3.233, |
|
"step": 7062 |
|
}, |
|
{ |
|
"epoch": 388.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1665712594985962, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8895, |
|
"eval_samples_per_second": 18.677, |
|
"eval_steps_per_second": 3.226, |
|
"step": 7081 |
|
}, |
|
{ |
|
"epoch": 388.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16607053577899933, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8751, |
|
"eval_samples_per_second": 18.723, |
|
"eval_steps_per_second": 3.234, |
|
"step": 7099 |
|
}, |
|
{ |
|
"epoch": 389.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16597412526607513, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8717, |
|
"eval_samples_per_second": 18.734, |
|
"eval_steps_per_second": 3.236, |
|
"step": 7117 |
|
}, |
|
{ |
|
"epoch": 390.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16590653359889984, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8807, |
|
"eval_samples_per_second": 18.705, |
|
"eval_steps_per_second": 3.231, |
|
"step": 7135 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16565540432929993, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8901, |
|
"eval_samples_per_second": 18.675, |
|
"eval_steps_per_second": 3.226, |
|
"step": 7154 |
|
}, |
|
{ |
|
"epoch": 392.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1656540036201477, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8795, |
|
"eval_samples_per_second": 18.709, |
|
"eval_steps_per_second": 3.232, |
|
"step": 7172 |
|
}, |
|
{ |
|
"epoch": 393.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16561686992645264, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.891, |
|
"eval_samples_per_second": 18.672, |
|
"eval_steps_per_second": 3.225, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 394.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16566209495067596, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8782, |
|
"eval_samples_per_second": 18.713, |
|
"eval_steps_per_second": 3.232, |
|
"step": 7208 |
|
}, |
|
{ |
|
"epoch": 396.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16528266668319702, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9003, |
|
"eval_samples_per_second": 18.643, |
|
"eval_steps_per_second": 3.22, |
|
"step": 7227 |
|
}, |
|
{ |
|
"epoch": 396.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16509102284908295, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8787, |
|
"eval_samples_per_second": 18.712, |
|
"eval_steps_per_second": 3.232, |
|
"step": 7245 |
|
}, |
|
{ |
|
"epoch": 397.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16473665833473206, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8817, |
|
"eval_samples_per_second": 18.702, |
|
"eval_steps_per_second": 3.23, |
|
"step": 7263 |
|
}, |
|
{ |
|
"epoch": 398.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16476485133171082, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8783, |
|
"eval_samples_per_second": 18.713, |
|
"eval_steps_per_second": 3.232, |
|
"step": 7281 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1648998111486435, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8847, |
|
"eval_samples_per_second": 18.692, |
|
"eval_steps_per_second": 3.229, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 400.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16450464725494385, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8833, |
|
"eval_samples_per_second": 18.697, |
|
"eval_steps_per_second": 3.23, |
|
"step": 7318 |
|
}, |
|
{ |
|
"epoch": 401.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16408170759677887, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9273, |
|
"eval_samples_per_second": 18.558, |
|
"eval_steps_per_second": 3.206, |
|
"step": 7336 |
|
}, |
|
{ |
|
"epoch": 402.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16408471763134003, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9148, |
|
"eval_samples_per_second": 18.597, |
|
"eval_steps_per_second": 3.212, |
|
"step": 7354 |
|
}, |
|
{ |
|
"epoch": 404.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1640195995569229, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.924, |
|
"eval_samples_per_second": 18.569, |
|
"eval_steps_per_second": 3.207, |
|
"step": 7373 |
|
}, |
|
{ |
|
"epoch": 404.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16400323808193207, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9183, |
|
"eval_samples_per_second": 18.586, |
|
"eval_steps_per_second": 3.21, |
|
"step": 7391 |
|
}, |
|
{ |
|
"epoch": 405.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16385148465633392, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9116, |
|
"eval_samples_per_second": 18.608, |
|
"eval_steps_per_second": 3.214, |
|
"step": 7409 |
|
}, |
|
{ |
|
"epoch": 406.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.163739874958992, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9025, |
|
"eval_samples_per_second": 18.636, |
|
"eval_steps_per_second": 3.219, |
|
"step": 7427 |
|
}, |
|
{ |
|
"epoch": 408.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1632901281118393, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8717, |
|
"eval_samples_per_second": 18.734, |
|
"eval_steps_per_second": 3.236, |
|
"step": 7446 |
|
}, |
|
{ |
|
"epoch": 408.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1632470041513443, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8869, |
|
"eval_samples_per_second": 18.686, |
|
"eval_steps_per_second": 3.228, |
|
"step": 7464 |
|
}, |
|
{ |
|
"epoch": 409.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16338156163692474, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.903, |
|
"eval_samples_per_second": 18.635, |
|
"eval_steps_per_second": 3.219, |
|
"step": 7482 |
|
}, |
|
{ |
|
"epoch": 410.96, |
|
"grad_norm": 0.4017387628555298, |
|
"learning_rate": 3.346666666666667e-06, |
|
"loss": 0.2061, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 410.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16326506435871124, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8869, |
|
"eval_samples_per_second": 18.686, |
|
"eval_steps_per_second": 3.228, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 412.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1629171222448349, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8877, |
|
"eval_samples_per_second": 18.683, |
|
"eval_steps_per_second": 3.227, |
|
"step": 7519 |
|
}, |
|
{ |
|
"epoch": 412.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1628817468881607, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9117, |
|
"eval_samples_per_second": 18.607, |
|
"eval_steps_per_second": 3.214, |
|
"step": 7537 |
|
}, |
|
{ |
|
"epoch": 413.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16285060346126556, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9244, |
|
"eval_samples_per_second": 18.567, |
|
"eval_steps_per_second": 3.207, |
|
"step": 7555 |
|
}, |
|
{ |
|
"epoch": 414.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16274811327457428, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8821, |
|
"eval_samples_per_second": 18.701, |
|
"eval_steps_per_second": 3.23, |
|
"step": 7573 |
|
}, |
|
{ |
|
"epoch": 416.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16286291182041168, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.918, |
|
"eval_samples_per_second": 18.587, |
|
"eval_steps_per_second": 3.211, |
|
"step": 7592 |
|
}, |
|
{ |
|
"epoch": 416.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16281896829605103, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8847, |
|
"eval_samples_per_second": 18.693, |
|
"eval_steps_per_second": 3.229, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 417.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16268208622932434, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9077, |
|
"eval_samples_per_second": 18.62, |
|
"eval_steps_per_second": 3.216, |
|
"step": 7628 |
|
}, |
|
{ |
|
"epoch": 418.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16232354938983917, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9153, |
|
"eval_samples_per_second": 18.596, |
|
"eval_steps_per_second": 3.212, |
|
"step": 7646 |
|
}, |
|
{ |
|
"epoch": 420.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16238705813884735, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9249, |
|
"eval_samples_per_second": 18.566, |
|
"eval_steps_per_second": 3.207, |
|
"step": 7665 |
|
}, |
|
{ |
|
"epoch": 420.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16214394569396973, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8853, |
|
"eval_samples_per_second": 18.691, |
|
"eval_steps_per_second": 3.228, |
|
"step": 7683 |
|
}, |
|
{ |
|
"epoch": 421.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16213367879390717, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.869, |
|
"eval_samples_per_second": 18.743, |
|
"eval_steps_per_second": 3.237, |
|
"step": 7701 |
|
}, |
|
{ |
|
"epoch": 422.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1621711403131485, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.869, |
|
"eval_samples_per_second": 18.743, |
|
"eval_steps_per_second": 3.237, |
|
"step": 7719 |
|
}, |
|
{ |
|
"epoch": 424.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16199961304664612, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8763, |
|
"eval_samples_per_second": 18.719, |
|
"eval_steps_per_second": 3.233, |
|
"step": 7738 |
|
}, |
|
{ |
|
"epoch": 424.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1616300344467163, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9176, |
|
"eval_samples_per_second": 18.589, |
|
"eval_steps_per_second": 3.211, |
|
"step": 7756 |
|
}, |
|
{ |
|
"epoch": 425.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16134707629680634, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9055, |
|
"eval_samples_per_second": 18.627, |
|
"eval_steps_per_second": 3.217, |
|
"step": 7774 |
|
}, |
|
{ |
|
"epoch": 426.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1613784283399582, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8741, |
|
"eval_samples_per_second": 18.726, |
|
"eval_steps_per_second": 3.235, |
|
"step": 7792 |
|
}, |
|
{ |
|
"epoch": 428.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1613391935825348, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.9229, |
|
"eval_samples_per_second": 18.572, |
|
"eval_steps_per_second": 3.208, |
|
"step": 7811 |
|
}, |
|
{ |
|
"epoch": 428.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1613980233669281, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8737, |
|
"eval_samples_per_second": 18.728, |
|
"eval_steps_per_second": 3.235, |
|
"step": 7829 |
|
}, |
|
{ |
|
"epoch": 429.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16145525872707367, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8935, |
|
"eval_samples_per_second": 18.664, |
|
"eval_steps_per_second": 3.224, |
|
"step": 7847 |
|
}, |
|
{ |
|
"epoch": 430.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16126497089862823, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8806, |
|
"eval_samples_per_second": 18.705, |
|
"eval_steps_per_second": 3.231, |
|
"step": 7865 |
|
}, |
|
{ |
|
"epoch": 432.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16113270819187164, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8882, |
|
"eval_samples_per_second": 18.681, |
|
"eval_steps_per_second": 3.227, |
|
"step": 7884 |
|
}, |
|
{ |
|
"epoch": 432.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16119304299354553, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8803, |
|
"eval_samples_per_second": 18.707, |
|
"eval_steps_per_second": 3.231, |
|
"step": 7902 |
|
}, |
|
{ |
|
"epoch": 433.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16113385558128357, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8923, |
|
"eval_samples_per_second": 18.668, |
|
"eval_steps_per_second": 3.225, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 434.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1608574539422989, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8855, |
|
"eval_samples_per_second": 18.69, |
|
"eval_steps_per_second": 3.228, |
|
"step": 7938 |
|
}, |
|
{ |
|
"epoch": 436.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16086578369140625, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8752, |
|
"eval_samples_per_second": 18.723, |
|
"eval_steps_per_second": 3.234, |
|
"step": 7957 |
|
}, |
|
{ |
|
"epoch": 436.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.1609336882829666, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8764, |
|
"eval_samples_per_second": 18.719, |
|
"eval_steps_per_second": 3.233, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 437.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.16086000204086304, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 5.8862, |
|
"eval_samples_per_second": 18.688, |
|
"eval_steps_per_second": 3.228, |
|
"step": 7993 |
|
}, |
|
{ |
|
"epoch": 438.36, |
|
"grad_norm": 0.3427538573741913, |
|
"learning_rate": 2.235555555555556e-06, |
|
"loss": 0.2001, |
|
"step": 8000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 500, |
|
"save_steps": 500, |
|
"total_flos": 2.332923933889659e+17, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|