|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 218.1818181818182, |
|
"eval_steps": 500, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.98, |
|
"eval_gen_len": 14.9909, |
|
"eval_loss": 21.427785873413086, |
|
"eval_rouge1": 0.0931, |
|
"eval_rouge2": 0.021, |
|
"eval_rougeL": 0.0719, |
|
"eval_rougeLsum": 0.0715, |
|
"eval_runtime": 11.4039, |
|
"eval_samples_per_second": 9.646, |
|
"eval_steps_per_second": 1.228, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 14.9909, |
|
"eval_loss": 21.134571075439453, |
|
"eval_rouge1": 0.0948, |
|
"eval_rouge2": 0.0238, |
|
"eval_rougeL": 0.073, |
|
"eval_rougeLsum": 0.0724, |
|
"eval_runtime": 9.5984, |
|
"eval_samples_per_second": 11.46, |
|
"eval_steps_per_second": 1.459, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_gen_len": 14.9909, |
|
"eval_loss": 20.702985763549805, |
|
"eval_rouge1": 0.0917, |
|
"eval_rouge2": 0.0203, |
|
"eval_rougeL": 0.0707, |
|
"eval_rougeLsum": 0.0703, |
|
"eval_runtime": 9.5997, |
|
"eval_samples_per_second": 11.459, |
|
"eval_steps_per_second": 1.458, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 15.3455, |
|
"eval_loss": 20.22304344177246, |
|
"eval_rouge1": 0.0991, |
|
"eval_rouge2": 0.0246, |
|
"eval_rougeL": 0.0781, |
|
"eval_rougeLsum": 0.0778, |
|
"eval_runtime": 9.5876, |
|
"eval_samples_per_second": 11.473, |
|
"eval_steps_per_second": 1.46, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_gen_len": 15.4818, |
|
"eval_loss": 19.888574600219727, |
|
"eval_rouge1": 0.1057, |
|
"eval_rouge2": 0.0297, |
|
"eval_rougeL": 0.0845, |
|
"eval_rougeLsum": 0.0845, |
|
"eval_runtime": 9.549, |
|
"eval_samples_per_second": 11.519, |
|
"eval_steps_per_second": 1.466, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 15.7182, |
|
"eval_loss": 19.539011001586914, |
|
"eval_rouge1": 0.1034, |
|
"eval_rouge2": 0.0276, |
|
"eval_rougeL": 0.084, |
|
"eval_rougeLsum": 0.0839, |
|
"eval_runtime": 9.551, |
|
"eval_samples_per_second": 11.517, |
|
"eval_steps_per_second": 1.466, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_gen_len": 15.8727, |
|
"eval_loss": 19.162174224853516, |
|
"eval_rouge1": 0.1023, |
|
"eval_rouge2": 0.0303, |
|
"eval_rougeL": 0.0837, |
|
"eval_rougeLsum": 0.0835, |
|
"eval_runtime": 9.5511, |
|
"eval_samples_per_second": 11.517, |
|
"eval_steps_per_second": 1.466, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 15.5091, |
|
"eval_loss": 18.730688095092773, |
|
"eval_rouge1": 0.1034, |
|
"eval_rouge2": 0.0342, |
|
"eval_rougeL": 0.0832, |
|
"eval_rougeLsum": 0.083, |
|
"eval_runtime": 9.5901, |
|
"eval_samples_per_second": 11.47, |
|
"eval_steps_per_second": 1.46, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_gen_len": 15.8, |
|
"eval_loss": 18.190088272094727, |
|
"eval_rouge1": 0.0969, |
|
"eval_rouge2": 0.0344, |
|
"eval_rougeL": 0.0818, |
|
"eval_rougeLsum": 0.0815, |
|
"eval_runtime": 9.5799, |
|
"eval_samples_per_second": 11.482, |
|
"eval_steps_per_second": 1.461, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 15.9455, |
|
"eval_loss": 17.473072052001953, |
|
"eval_rouge1": 0.1041, |
|
"eval_rouge2": 0.0337, |
|
"eval_rougeL": 0.0857, |
|
"eval_rougeLsum": 0.0853, |
|
"eval_runtime": 9.5524, |
|
"eval_samples_per_second": 11.515, |
|
"eval_steps_per_second": 1.466, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_gen_len": 18.0182, |
|
"eval_loss": 16.60153579711914, |
|
"eval_rouge1": 0.1001, |
|
"eval_rouge2": 0.029, |
|
"eval_rougeL": 0.0828, |
|
"eval_rougeLsum": 0.0828, |
|
"eval_runtime": 9.5623, |
|
"eval_samples_per_second": 11.503, |
|
"eval_steps_per_second": 1.464, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 16.9636, |
|
"eval_loss": 15.59052848815918, |
|
"eval_rouge1": 0.0875, |
|
"eval_rouge2": 0.0222, |
|
"eval_rougeL": 0.0724, |
|
"eval_rougeLsum": 0.0724, |
|
"eval_runtime": 9.5436, |
|
"eval_samples_per_second": 11.526, |
|
"eval_steps_per_second": 1.467, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"eval_gen_len": 15.1455, |
|
"eval_loss": 14.362702369689941, |
|
"eval_rouge1": 0.0657, |
|
"eval_rouge2": 0.0153, |
|
"eval_rougeL": 0.0545, |
|
"eval_rougeLsum": 0.0543, |
|
"eval_runtime": 9.5681, |
|
"eval_samples_per_second": 11.497, |
|
"eval_steps_per_second": 1.463, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_gen_len": 13.2273, |
|
"eval_loss": 12.995957374572754, |
|
"eval_rouge1": 0.0449, |
|
"eval_rouge2": 0.0083, |
|
"eval_rougeL": 0.0363, |
|
"eval_rougeLsum": 0.0361, |
|
"eval_runtime": 9.506, |
|
"eval_samples_per_second": 11.572, |
|
"eval_steps_per_second": 1.473, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_gen_len": 13.1818, |
|
"eval_loss": 11.857050895690918, |
|
"eval_rouge1": 0.0337, |
|
"eval_rouge2": 0.0071, |
|
"eval_rougeL": 0.0303, |
|
"eval_rougeLsum": 0.0304, |
|
"eval_runtime": 9.5174, |
|
"eval_samples_per_second": 11.558, |
|
"eval_steps_per_second": 1.471, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 12.0545, |
|
"eval_loss": 10.728998184204102, |
|
"eval_rouge1": 0.0137, |
|
"eval_rouge2": 0.002, |
|
"eval_rougeL": 0.0114, |
|
"eval_rougeLsum": 0.0112, |
|
"eval_runtime": 9.5263, |
|
"eval_samples_per_second": 11.547, |
|
"eval_steps_per_second": 1.47, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_gen_len": 14.0818, |
|
"eval_loss": 9.675826072692871, |
|
"eval_rouge1": 0.0051, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.005, |
|
"eval_rougeLsum": 0.0049, |
|
"eval_runtime": 9.4916, |
|
"eval_samples_per_second": 11.589, |
|
"eval_steps_per_second": 1.475, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_gen_len": 17.2545, |
|
"eval_loss": 8.581353187561035, |
|
"eval_rouge1": 0.0013, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0013, |
|
"eval_rougeLsum": 0.0013, |
|
"eval_runtime": 9.4979, |
|
"eval_samples_per_second": 11.581, |
|
"eval_steps_per_second": 1.474, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"grad_norm": 5.896080017089844, |
|
"learning_rate": 1.8360493827160496e-05, |
|
"loss": 16.7039, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_gen_len": 17.5455, |
|
"eval_loss": 7.5404815673828125, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0014, |
|
"eval_rougeLsum": 0.0014, |
|
"eval_runtime": 9.5287, |
|
"eval_samples_per_second": 11.544, |
|
"eval_steps_per_second": 1.469, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_gen_len": 18.5636, |
|
"eval_loss": 6.503509044647217, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.5264, |
|
"eval_samples_per_second": 11.547, |
|
"eval_steps_per_second": 1.47, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 5.559322834014893, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.5196, |
|
"eval_samples_per_second": 11.555, |
|
"eval_steps_per_second": 1.471, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 4.715544700622559, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.5225, |
|
"eval_samples_per_second": 11.552, |
|
"eval_steps_per_second": 1.47, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 22.98, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 4.022518634796143, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.5406, |
|
"eval_samples_per_second": 11.53, |
|
"eval_steps_per_second": 1.467, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.401372194290161, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.5371, |
|
"eval_samples_per_second": 11.534, |
|
"eval_steps_per_second": 1.468, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 24.98, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.971496105194092, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.5401, |
|
"eval_samples_per_second": 11.53, |
|
"eval_steps_per_second": 1.467, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_gen_len": 18.7273, |
|
"eval_loss": 2.6538097858428955, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.5288, |
|
"eval_samples_per_second": 11.544, |
|
"eval_steps_per_second": 1.469, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 26.98, |
|
"eval_gen_len": 14.5, |
|
"eval_loss": 2.4105889797210693, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.478, |
|
"eval_samples_per_second": 11.606, |
|
"eval_steps_per_second": 1.477, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_gen_len": 8.2636, |
|
"eval_loss": 2.2297565937042236, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4449, |
|
"eval_samples_per_second": 11.646, |
|
"eval_steps_per_second": 1.482, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 28.98, |
|
"eval_gen_len": 7.1455, |
|
"eval_loss": 2.1042511463165283, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4539, |
|
"eval_samples_per_second": 11.635, |
|
"eval_steps_per_second": 1.481, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_gen_len": 6.5545, |
|
"eval_loss": 2.011505126953125, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4645, |
|
"eval_samples_per_second": 11.622, |
|
"eval_steps_per_second": 1.479, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 30.98, |
|
"eval_gen_len": 6.0273, |
|
"eval_loss": 1.927019715309143, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5048, |
|
"eval_samples_per_second": 11.573, |
|
"eval_steps_per_second": 1.473, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_gen_len": 5.4091, |
|
"eval_loss": 1.8653310537338257, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.49, |
|
"eval_samples_per_second": 11.591, |
|
"eval_steps_per_second": 1.475, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 32.98, |
|
"eval_gen_len": 5.1727, |
|
"eval_loss": 1.8196451663970947, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4647, |
|
"eval_samples_per_second": 11.622, |
|
"eval_steps_per_second": 1.479, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_gen_len": 5.1909, |
|
"eval_loss": 1.7811836004257202, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4798, |
|
"eval_samples_per_second": 11.604, |
|
"eval_steps_per_second": 1.477, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 34.98, |
|
"eval_gen_len": 5.3182, |
|
"eval_loss": 1.7490955591201782, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4662, |
|
"eval_samples_per_second": 11.62, |
|
"eval_steps_per_second": 1.479, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_gen_len": 5.3091, |
|
"eval_loss": 1.721943974494934, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.4582, |
|
"eval_samples_per_second": 11.63, |
|
"eval_steps_per_second": 1.48, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"grad_norm": 3.1814663410186768, |
|
"learning_rate": 1.6720987654320987e-05, |
|
"loss": 3.9957, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 36.98, |
|
"eval_gen_len": 5.3273, |
|
"eval_loss": 1.69921875, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.5006, |
|
"eval_samples_per_second": 11.578, |
|
"eval_steps_per_second": 1.474, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_gen_len": 5.2091, |
|
"eval_loss": 1.678276777267456, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.4859, |
|
"eval_samples_per_second": 11.596, |
|
"eval_steps_per_second": 1.476, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 38.98, |
|
"eval_gen_len": 5.9273, |
|
"eval_loss": 1.657922625541687, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.4757, |
|
"eval_samples_per_second": 11.609, |
|
"eval_steps_per_second": 1.477, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_gen_len": 6.1909, |
|
"eval_loss": 1.6388959884643555, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.4468, |
|
"eval_samples_per_second": 11.644, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"eval_gen_len": 5.6818, |
|
"eval_loss": 1.622721552848816, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4505, |
|
"eval_samples_per_second": 11.64, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_gen_len": 5.0818, |
|
"eval_loss": 1.6065285205841064, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4897, |
|
"eval_samples_per_second": 11.592, |
|
"eval_steps_per_second": 1.475, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 42.98, |
|
"eval_gen_len": 5.5091, |
|
"eval_loss": 1.5892502069473267, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.4662, |
|
"eval_samples_per_second": 11.62, |
|
"eval_steps_per_second": 1.479, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_gen_len": 6.0636, |
|
"eval_loss": 1.568816900253296, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4716, |
|
"eval_samples_per_second": 11.614, |
|
"eval_steps_per_second": 1.478, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 44.98, |
|
"eval_gen_len": 6.6273, |
|
"eval_loss": 1.5522329807281494, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4809, |
|
"eval_samples_per_second": 11.602, |
|
"eval_steps_per_second": 1.477, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_gen_len": 6.5364, |
|
"eval_loss": 1.539686918258667, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4493, |
|
"eval_samples_per_second": 11.641, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 46.98, |
|
"eval_gen_len": 6.3909, |
|
"eval_loss": 1.527640700340271, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.441, |
|
"eval_samples_per_second": 11.651, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_gen_len": 7.2818, |
|
"eval_loss": 1.5062768459320068, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 9.4248, |
|
"eval_samples_per_second": 11.671, |
|
"eval_steps_per_second": 1.485, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 48.98, |
|
"eval_gen_len": 7.9273, |
|
"eval_loss": 1.4878034591674805, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.5189, |
|
"eval_samples_per_second": 11.556, |
|
"eval_steps_per_second": 1.471, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_gen_len": 7.5636, |
|
"eval_loss": 1.4774630069732666, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.4679, |
|
"eval_samples_per_second": 11.618, |
|
"eval_steps_per_second": 1.479, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 50.98, |
|
"eval_gen_len": 8.2273, |
|
"eval_loss": 1.4622873067855835, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 9.4582, |
|
"eval_samples_per_second": 11.63, |
|
"eval_steps_per_second": 1.48, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_gen_len": 8.5636, |
|
"eval_loss": 1.4518091678619385, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 9.4604, |
|
"eval_samples_per_second": 11.627, |
|
"eval_steps_per_second": 1.48, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 52.98, |
|
"eval_gen_len": 8.2545, |
|
"eval_loss": 1.4443649053573608, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 9.4485, |
|
"eval_samples_per_second": 11.642, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_gen_len": 7.9545, |
|
"eval_loss": 1.4318201541900635, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.5193, |
|
"eval_samples_per_second": 11.555, |
|
"eval_steps_per_second": 1.471, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 54.55, |
|
"grad_norm": 4.463714599609375, |
|
"learning_rate": 1.5078189300411523e-05, |
|
"loss": 1.9182, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 54.98, |
|
"eval_gen_len": 8.8273, |
|
"eval_loss": 1.4121521711349487, |
|
"eval_rouge1": 0.0009, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0009, |
|
"eval_rougeLsum": 0.0009, |
|
"eval_runtime": 9.4998, |
|
"eval_samples_per_second": 11.579, |
|
"eval_steps_per_second": 1.474, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_gen_len": 7.8455, |
|
"eval_loss": 1.400160789489746, |
|
"eval_rouge1": 0.0012, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0013, |
|
"eval_rougeLsum": 0.0012, |
|
"eval_runtime": 9.4886, |
|
"eval_samples_per_second": 11.593, |
|
"eval_steps_per_second": 1.475, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 56.98, |
|
"eval_gen_len": 7.7909, |
|
"eval_loss": 1.3821996450424194, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 9.4712, |
|
"eval_samples_per_second": 11.614, |
|
"eval_steps_per_second": 1.478, |
|
"step": 1567 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_gen_len": 8.5545, |
|
"eval_loss": 1.370047688484192, |
|
"eval_rouge1": 0.001, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.001, |
|
"eval_rougeLsum": 0.001, |
|
"eval_runtime": 9.4659, |
|
"eval_samples_per_second": 11.621, |
|
"eval_steps_per_second": 1.479, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 58.98, |
|
"eval_gen_len": 8.4636, |
|
"eval_loss": 1.3583662509918213, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 9.472, |
|
"eval_samples_per_second": 11.613, |
|
"eval_steps_per_second": 1.478, |
|
"step": 1622 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_gen_len": 8.0909, |
|
"eval_loss": 1.3374855518341064, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.4676, |
|
"eval_samples_per_second": 11.619, |
|
"eval_steps_per_second": 1.479, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 60.98, |
|
"eval_gen_len": 8.7, |
|
"eval_loss": 1.3220137357711792, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0016, |
|
"eval_rougeLsum": 0.0016, |
|
"eval_runtime": 9.504, |
|
"eval_samples_per_second": 11.574, |
|
"eval_steps_per_second": 1.473, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_gen_len": 8.5818, |
|
"eval_loss": 1.3142321109771729, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 9.4996, |
|
"eval_samples_per_second": 11.579, |
|
"eval_steps_per_second": 1.474, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 62.98, |
|
"eval_gen_len": 8.7727, |
|
"eval_loss": 1.3096009492874146, |
|
"eval_rouge1": 0.0016, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0017, |
|
"eval_rougeLsum": 0.0017, |
|
"eval_runtime": 9.463, |
|
"eval_samples_per_second": 11.624, |
|
"eval_steps_per_second": 1.479, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_gen_len": 9.0455, |
|
"eval_loss": 1.2836058139801025, |
|
"eval_rouge1": 0.0013, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0012, |
|
"eval_rougeLsum": 0.0013, |
|
"eval_runtime": 9.4725, |
|
"eval_samples_per_second": 11.613, |
|
"eval_steps_per_second": 1.478, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 64.98, |
|
"eval_gen_len": 9.0, |
|
"eval_loss": 1.2642889022827148, |
|
"eval_rouge1": 0.002, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.002, |
|
"eval_rougeLsum": 0.002, |
|
"eval_runtime": 9.6032, |
|
"eval_samples_per_second": 11.454, |
|
"eval_steps_per_second": 1.458, |
|
"step": 1787 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_gen_len": 8.8182, |
|
"eval_loss": 1.2471901178359985, |
|
"eval_rouge1": 0.0052, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0052, |
|
"eval_rougeLsum": 0.0052, |
|
"eval_runtime": 9.4575, |
|
"eval_samples_per_second": 11.631, |
|
"eval_steps_per_second": 1.48, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"eval_gen_len": 9.6636, |
|
"eval_loss": 1.226246953010559, |
|
"eval_rouge1": 0.0086, |
|
"eval_rouge2": 0.0013, |
|
"eval_rougeL": 0.0088, |
|
"eval_rougeLsum": 0.0088, |
|
"eval_runtime": 9.4953, |
|
"eval_samples_per_second": 11.585, |
|
"eval_steps_per_second": 1.474, |
|
"step": 1842 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_gen_len": 9.5273, |
|
"eval_loss": 1.2092421054840088, |
|
"eval_rouge1": 0.0107, |
|
"eval_rouge2": 0.0019, |
|
"eval_rougeL": 0.009, |
|
"eval_rougeLsum": 0.0088, |
|
"eval_runtime": 9.5273, |
|
"eval_samples_per_second": 11.546, |
|
"eval_steps_per_second": 1.469, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 68.98, |
|
"eval_gen_len": 9.8091, |
|
"eval_loss": 1.1953155994415283, |
|
"eval_rouge1": 0.0123, |
|
"eval_rouge2": 0.0032, |
|
"eval_rougeL": 0.012, |
|
"eval_rougeLsum": 0.0118, |
|
"eval_runtime": 9.4864, |
|
"eval_samples_per_second": 11.596, |
|
"eval_steps_per_second": 1.476, |
|
"step": 1897 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_gen_len": 9.3636, |
|
"eval_loss": 1.1815446615219116, |
|
"eval_rouge1": 0.0104, |
|
"eval_rouge2": 0.0033, |
|
"eval_rougeL": 0.0101, |
|
"eval_rougeLsum": 0.0098, |
|
"eval_runtime": 9.4807, |
|
"eval_samples_per_second": 11.603, |
|
"eval_steps_per_second": 1.477, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 70.98, |
|
"eval_gen_len": 9.1091, |
|
"eval_loss": 1.161879301071167, |
|
"eval_rouge1": 0.0064, |
|
"eval_rouge2": 0.0008, |
|
"eval_rougeL": 0.0056, |
|
"eval_rougeLsum": 0.0055, |
|
"eval_runtime": 9.4717, |
|
"eval_samples_per_second": 11.613, |
|
"eval_steps_per_second": 1.478, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_gen_len": 9.3273, |
|
"eval_loss": 1.139123797416687, |
|
"eval_rouge1": 0.0105, |
|
"eval_rouge2": 0.002, |
|
"eval_rougeL": 0.0099, |
|
"eval_rougeLsum": 0.0098, |
|
"eval_runtime": 9.4501, |
|
"eval_samples_per_second": 11.64, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"grad_norm": 3.556974172592163, |
|
"learning_rate": 1.34320987654321e-05, |
|
"loss": 1.6026, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 72.98, |
|
"eval_gen_len": 9.7364, |
|
"eval_loss": 1.1243102550506592, |
|
"eval_rouge1": 0.0108, |
|
"eval_rouge2": 0.0019, |
|
"eval_rougeL": 0.0104, |
|
"eval_rougeLsum": 0.0101, |
|
"eval_runtime": 9.4726, |
|
"eval_samples_per_second": 11.612, |
|
"eval_steps_per_second": 1.478, |
|
"step": 2007 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_gen_len": 10.6182, |
|
"eval_loss": 1.1061749458312988, |
|
"eval_rouge1": 0.0204, |
|
"eval_rouge2": 0.0031, |
|
"eval_rougeL": 0.0185, |
|
"eval_rougeLsum": 0.0183, |
|
"eval_runtime": 9.4719, |
|
"eval_samples_per_second": 11.613, |
|
"eval_steps_per_second": 1.478, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 74.98, |
|
"eval_gen_len": 10.1545, |
|
"eval_loss": 1.0930193662643433, |
|
"eval_rouge1": 0.0159, |
|
"eval_rouge2": 0.0017, |
|
"eval_rougeL": 0.0145, |
|
"eval_rougeLsum": 0.0143, |
|
"eval_runtime": 9.4961, |
|
"eval_samples_per_second": 11.584, |
|
"eval_steps_per_second": 1.474, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_gen_len": 10.7364, |
|
"eval_loss": 1.077184796333313, |
|
"eval_rouge1": 0.0151, |
|
"eval_rouge2": 0.0023, |
|
"eval_rougeL": 0.0133, |
|
"eval_rougeLsum": 0.0133, |
|
"eval_runtime": 9.5109, |
|
"eval_samples_per_second": 11.566, |
|
"eval_steps_per_second": 1.472, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 76.98, |
|
"eval_gen_len": 10.7, |
|
"eval_loss": 1.0578068494796753, |
|
"eval_rouge1": 0.0195, |
|
"eval_rouge2": 0.0044, |
|
"eval_rougeL": 0.0178, |
|
"eval_rougeLsum": 0.0178, |
|
"eval_runtime": 9.4786, |
|
"eval_samples_per_second": 11.605, |
|
"eval_steps_per_second": 1.477, |
|
"step": 2117 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_gen_len": 11.0636, |
|
"eval_loss": 1.0393445491790771, |
|
"eval_rouge1": 0.0237, |
|
"eval_rouge2": 0.0048, |
|
"eval_rougeL": 0.0214, |
|
"eval_rougeLsum": 0.0211, |
|
"eval_runtime": 9.4415, |
|
"eval_samples_per_second": 11.651, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 78.98, |
|
"eval_gen_len": 10.1455, |
|
"eval_loss": 1.0262919664382935, |
|
"eval_rouge1": 0.0121, |
|
"eval_rouge2": 0.0014, |
|
"eval_rougeL": 0.0113, |
|
"eval_rougeLsum": 0.0112, |
|
"eval_runtime": 9.4285, |
|
"eval_samples_per_second": 11.667, |
|
"eval_steps_per_second": 1.485, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_gen_len": 11.3818, |
|
"eval_loss": 1.0064616203308105, |
|
"eval_rouge1": 0.0273, |
|
"eval_rouge2": 0.0048, |
|
"eval_rougeL": 0.0238, |
|
"eval_rougeLsum": 0.0235, |
|
"eval_runtime": 9.448, |
|
"eval_samples_per_second": 11.643, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 80.98, |
|
"eval_gen_len": 10.9545, |
|
"eval_loss": 0.98997563123703, |
|
"eval_rouge1": 0.0228, |
|
"eval_rouge2": 0.0042, |
|
"eval_rougeL": 0.0197, |
|
"eval_rougeLsum": 0.0196, |
|
"eval_runtime": 9.4798, |
|
"eval_samples_per_second": 11.604, |
|
"eval_steps_per_second": 1.477, |
|
"step": 2227 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_gen_len": 10.9909, |
|
"eval_loss": 0.9675103425979614, |
|
"eval_rouge1": 0.024, |
|
"eval_rouge2": 0.0046, |
|
"eval_rougeL": 0.0204, |
|
"eval_rougeLsum": 0.0202, |
|
"eval_runtime": 9.4448, |
|
"eval_samples_per_second": 11.647, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 82.98, |
|
"eval_gen_len": 10.7182, |
|
"eval_loss": 0.9506540298461914, |
|
"eval_rouge1": 0.0244, |
|
"eval_rouge2": 0.0051, |
|
"eval_rougeL": 0.0211, |
|
"eval_rougeLsum": 0.0211, |
|
"eval_runtime": 9.5012, |
|
"eval_samples_per_second": 11.578, |
|
"eval_steps_per_second": 1.474, |
|
"step": 2282 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_gen_len": 10.8636, |
|
"eval_loss": 0.9340613484382629, |
|
"eval_rouge1": 0.0249, |
|
"eval_rouge2": 0.0058, |
|
"eval_rougeL": 0.022, |
|
"eval_rougeLsum": 0.0223, |
|
"eval_runtime": 9.447, |
|
"eval_samples_per_second": 11.644, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 84.98, |
|
"eval_gen_len": 10.0909, |
|
"eval_loss": 0.9161014556884766, |
|
"eval_rouge1": 0.0243, |
|
"eval_rouge2": 0.0077, |
|
"eval_rougeL": 0.0224, |
|
"eval_rougeLsum": 0.0226, |
|
"eval_runtime": 9.4492, |
|
"eval_samples_per_second": 11.641, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2337 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_gen_len": 9.7182, |
|
"eval_loss": 0.8942736983299255, |
|
"eval_rouge1": 0.0176, |
|
"eval_rouge2": 0.0035, |
|
"eval_rougeL": 0.0152, |
|
"eval_rougeLsum": 0.0153, |
|
"eval_runtime": 9.4727, |
|
"eval_samples_per_second": 11.612, |
|
"eval_steps_per_second": 1.478, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 86.98, |
|
"eval_gen_len": 10.0, |
|
"eval_loss": 0.8758471608161926, |
|
"eval_rouge1": 0.0239, |
|
"eval_rouge2": 0.0093, |
|
"eval_rougeL": 0.0214, |
|
"eval_rougeLsum": 0.0215, |
|
"eval_runtime": 9.4864, |
|
"eval_samples_per_second": 11.596, |
|
"eval_steps_per_second": 1.476, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_gen_len": 10.2273, |
|
"eval_loss": 0.854742169380188, |
|
"eval_rouge1": 0.0254, |
|
"eval_rouge2": 0.0116, |
|
"eval_rougeL": 0.0237, |
|
"eval_rougeLsum": 0.0238, |
|
"eval_runtime": 9.5043, |
|
"eval_samples_per_second": 11.574, |
|
"eval_steps_per_second": 1.473, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 88.98, |
|
"eval_gen_len": 10.2545, |
|
"eval_loss": 0.8352662324905396, |
|
"eval_rouge1": 0.0196, |
|
"eval_rouge2": 0.007, |
|
"eval_rougeL": 0.0183, |
|
"eval_rougeLsum": 0.0182, |
|
"eval_runtime": 9.455, |
|
"eval_samples_per_second": 11.634, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2447 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_gen_len": 9.2364, |
|
"eval_loss": 0.8150736093521118, |
|
"eval_rouge1": 0.0104, |
|
"eval_rouge2": 0.0032, |
|
"eval_rougeL": 0.0095, |
|
"eval_rougeLsum": 0.0098, |
|
"eval_runtime": 9.4416, |
|
"eval_samples_per_second": 11.651, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"grad_norm": 1.338108777999878, |
|
"learning_rate": 1.1786008230452676e-05, |
|
"loss": 1.2934, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 90.98, |
|
"eval_gen_len": 9.3455, |
|
"eval_loss": 0.7920636534690857, |
|
"eval_rouge1": 0.01, |
|
"eval_rouge2": 0.0036, |
|
"eval_rougeL": 0.0095, |
|
"eval_rougeLsum": 0.0096, |
|
"eval_runtime": 9.4783, |
|
"eval_samples_per_second": 11.605, |
|
"eval_steps_per_second": 1.477, |
|
"step": 2502 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_gen_len": 9.2545, |
|
"eval_loss": 0.7697137594223022, |
|
"eval_rouge1": 0.012, |
|
"eval_rouge2": 0.0051, |
|
"eval_rougeL": 0.0111, |
|
"eval_rougeLsum": 0.011, |
|
"eval_runtime": 9.5826, |
|
"eval_samples_per_second": 11.479, |
|
"eval_steps_per_second": 1.461, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 92.98, |
|
"eval_gen_len": 8.8455, |
|
"eval_loss": 0.7492441534996033, |
|
"eval_rouge1": 0.0106, |
|
"eval_rouge2": 0.0048, |
|
"eval_rougeL": 0.0098, |
|
"eval_rougeLsum": 0.0098, |
|
"eval_runtime": 9.4826, |
|
"eval_samples_per_second": 11.6, |
|
"eval_steps_per_second": 1.476, |
|
"step": 2557 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_gen_len": 9.4, |
|
"eval_loss": 0.7301111221313477, |
|
"eval_rouge1": 0.0112, |
|
"eval_rouge2": 0.0053, |
|
"eval_rougeL": 0.0095, |
|
"eval_rougeLsum": 0.0095, |
|
"eval_runtime": 9.4682, |
|
"eval_samples_per_second": 11.618, |
|
"eval_steps_per_second": 1.479, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 94.98, |
|
"eval_gen_len": 8.9, |
|
"eval_loss": 0.7125746607780457, |
|
"eval_rouge1": 0.0069, |
|
"eval_rouge2": 0.0026, |
|
"eval_rougeL": 0.0063, |
|
"eval_rougeLsum": 0.0064, |
|
"eval_runtime": 9.4699, |
|
"eval_samples_per_second": 11.616, |
|
"eval_steps_per_second": 1.478, |
|
"step": 2612 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_gen_len": 9.2, |
|
"eval_loss": 0.6931119561195374, |
|
"eval_rouge1": 0.0107, |
|
"eval_rouge2": 0.0053, |
|
"eval_rougeL": 0.0088, |
|
"eval_rougeLsum": 0.0089, |
|
"eval_runtime": 9.4599, |
|
"eval_samples_per_second": 11.628, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 96.98, |
|
"eval_gen_len": 8.8909, |
|
"eval_loss": 0.6750566363334656, |
|
"eval_rouge1": 0.0063, |
|
"eval_rouge2": 0.0029, |
|
"eval_rougeL": 0.0049, |
|
"eval_rougeLsum": 0.0048, |
|
"eval_runtime": 9.4656, |
|
"eval_samples_per_second": 11.621, |
|
"eval_steps_per_second": 1.479, |
|
"step": 2667 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_gen_len": 9.3182, |
|
"eval_loss": 0.6581041216850281, |
|
"eval_rouge1": 0.0091, |
|
"eval_rouge2": 0.005, |
|
"eval_rougeL": 0.0069, |
|
"eval_rougeLsum": 0.0071, |
|
"eval_runtime": 9.4802, |
|
"eval_samples_per_second": 11.603, |
|
"eval_steps_per_second": 1.477, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 98.98, |
|
"eval_gen_len": 9.1091, |
|
"eval_loss": 0.6413628458976746, |
|
"eval_rouge1": 0.0038, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.0026, |
|
"eval_rougeLsum": 0.0034, |
|
"eval_runtime": 9.4864, |
|
"eval_samples_per_second": 11.595, |
|
"eval_steps_per_second": 1.476, |
|
"step": 2722 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_gen_len": 9.5273, |
|
"eval_loss": 0.6237995028495789, |
|
"eval_rouge1": 0.0125, |
|
"eval_rouge2": 0.0088, |
|
"eval_rougeL": 0.0109, |
|
"eval_rougeLsum": 0.0109, |
|
"eval_runtime": 9.479, |
|
"eval_samples_per_second": 11.605, |
|
"eval_steps_per_second": 1.477, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 100.98, |
|
"eval_gen_len": 9.3727, |
|
"eval_loss": 0.6053850054740906, |
|
"eval_rouge1": 0.0053, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.0042, |
|
"eval_rougeLsum": 0.0045, |
|
"eval_runtime": 9.4604, |
|
"eval_samples_per_second": 11.627, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2777 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_gen_len": 9.7091, |
|
"eval_loss": 0.5907317996025085, |
|
"eval_rouge1": 0.0115, |
|
"eval_rouge2": 0.0096, |
|
"eval_rougeL": 0.0108, |
|
"eval_rougeLsum": 0.0109, |
|
"eval_runtime": 9.4533, |
|
"eval_samples_per_second": 11.636, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 102.98, |
|
"eval_gen_len": 9.5364, |
|
"eval_loss": 0.5752558708190918, |
|
"eval_rouge1": 0.0055, |
|
"eval_rouge2": 0.0045, |
|
"eval_rougeL": 0.005, |
|
"eval_rougeLsum": 0.0053, |
|
"eval_runtime": 9.4454, |
|
"eval_samples_per_second": 11.646, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2832 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_gen_len": 9.7273, |
|
"eval_loss": 0.562364399433136, |
|
"eval_rouge1": 0.01, |
|
"eval_rouge2": 0.0057, |
|
"eval_rougeL": 0.0091, |
|
"eval_rougeLsum": 0.0094, |
|
"eval_runtime": 9.4478, |
|
"eval_samples_per_second": 11.643, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 104.98, |
|
"eval_gen_len": 9.6909, |
|
"eval_loss": 0.5496523380279541, |
|
"eval_rouge1": 0.0078, |
|
"eval_rouge2": 0.0038, |
|
"eval_rougeL": 0.0066, |
|
"eval_rougeLsum": 0.0069, |
|
"eval_runtime": 9.4963, |
|
"eval_samples_per_second": 11.583, |
|
"eval_steps_per_second": 1.474, |
|
"step": 2887 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_gen_len": 9.6, |
|
"eval_loss": 0.5380507111549377, |
|
"eval_rouge1": 0.0077, |
|
"eval_rouge2": 0.0041, |
|
"eval_rougeL": 0.0068, |
|
"eval_rougeLsum": 0.0071, |
|
"eval_runtime": 9.4581, |
|
"eval_samples_per_second": 11.63, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 106.98, |
|
"eval_gen_len": 9.2909, |
|
"eval_loss": 0.5269507765769958, |
|
"eval_rouge1": 0.0109, |
|
"eval_rouge2": 0.0068, |
|
"eval_rougeL": 0.0101, |
|
"eval_rougeLsum": 0.0103, |
|
"eval_runtime": 9.4431, |
|
"eval_samples_per_second": 11.649, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2942 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_gen_len": 8.9636, |
|
"eval_loss": 0.5166643857955933, |
|
"eval_rouge1": 0.0095, |
|
"eval_rouge2": 0.004, |
|
"eval_rougeL": 0.008, |
|
"eval_rougeLsum": 0.0079, |
|
"eval_runtime": 9.4573, |
|
"eval_samples_per_second": 11.631, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 108.98, |
|
"eval_gen_len": 9.3818, |
|
"eval_loss": 0.5079358816146851, |
|
"eval_rouge1": 0.0078, |
|
"eval_rouge2": 0.0035, |
|
"eval_rougeL": 0.0055, |
|
"eval_rougeLsum": 0.0059, |
|
"eval_runtime": 9.4478, |
|
"eval_samples_per_second": 11.643, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2997 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"grad_norm": 2.720813512802124, |
|
"learning_rate": 1.0139917695473251e-05, |
|
"loss": 0.9194, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_gen_len": 9.2909, |
|
"eval_loss": 0.5007773041725159, |
|
"eval_rouge1": 0.0044, |
|
"eval_rouge2": 0.0019, |
|
"eval_rougeL": 0.0037, |
|
"eval_rougeLsum": 0.0033, |
|
"eval_runtime": 9.4749, |
|
"eval_samples_per_second": 11.61, |
|
"eval_steps_per_second": 1.478, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 110.98, |
|
"eval_gen_len": 9.2909, |
|
"eval_loss": 0.49183493852615356, |
|
"eval_rouge1": 0.0108, |
|
"eval_rouge2": 0.0069, |
|
"eval_rougeL": 0.0087, |
|
"eval_rougeLsum": 0.0088, |
|
"eval_runtime": 9.5004, |
|
"eval_samples_per_second": 11.578, |
|
"eval_steps_per_second": 1.474, |
|
"step": 3052 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_gen_len": 9.2455, |
|
"eval_loss": 0.48342087864875793, |
|
"eval_rouge1": 0.0112, |
|
"eval_rouge2": 0.008, |
|
"eval_rougeL": 0.0092, |
|
"eval_rougeLsum": 0.009, |
|
"eval_runtime": 9.5074, |
|
"eval_samples_per_second": 11.57, |
|
"eval_steps_per_second": 1.473, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 112.98, |
|
"eval_gen_len": 8.9364, |
|
"eval_loss": 0.47708794474601746, |
|
"eval_rouge1": 0.0057, |
|
"eval_rouge2": 0.0031, |
|
"eval_rougeL": 0.0042, |
|
"eval_rougeLsum": 0.004, |
|
"eval_runtime": 9.5044, |
|
"eval_samples_per_second": 11.574, |
|
"eval_steps_per_second": 1.473, |
|
"step": 3107 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_gen_len": 9.1364, |
|
"eval_loss": 0.47102922201156616, |
|
"eval_rouge1": 0.0122, |
|
"eval_rouge2": 0.0065, |
|
"eval_rougeL": 0.0089, |
|
"eval_rougeLsum": 0.0087, |
|
"eval_runtime": 9.4603, |
|
"eval_samples_per_second": 11.628, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 114.98, |
|
"eval_gen_len": 9.2182, |
|
"eval_loss": 0.4654105007648468, |
|
"eval_rouge1": 0.0102, |
|
"eval_rouge2": 0.0063, |
|
"eval_rougeL": 0.0085, |
|
"eval_rougeLsum": 0.0083, |
|
"eval_runtime": 9.4614, |
|
"eval_samples_per_second": 11.626, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_gen_len": 8.9455, |
|
"eval_loss": 0.4600731432437897, |
|
"eval_rouge1": 0.0119, |
|
"eval_rouge2": 0.0092, |
|
"eval_rougeL": 0.0104, |
|
"eval_rougeLsum": 0.0102, |
|
"eval_runtime": 9.461, |
|
"eval_samples_per_second": 11.627, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 116.98, |
|
"eval_gen_len": 8.4182, |
|
"eval_loss": 0.4526377320289612, |
|
"eval_rouge1": 0.0107, |
|
"eval_rouge2": 0.0082, |
|
"eval_rougeL": 0.0099, |
|
"eval_rougeLsum": 0.0097, |
|
"eval_runtime": 9.4731, |
|
"eval_samples_per_second": 11.612, |
|
"eval_steps_per_second": 1.478, |
|
"step": 3217 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_gen_len": 8.4273, |
|
"eval_loss": 0.4467811584472656, |
|
"eval_rouge1": 0.0052, |
|
"eval_rouge2": 0.0044, |
|
"eval_rougeL": 0.0045, |
|
"eval_rougeLsum": 0.0045, |
|
"eval_runtime": 9.4604, |
|
"eval_samples_per_second": 11.627, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 118.98, |
|
"eval_gen_len": 8.3, |
|
"eval_loss": 0.4426099956035614, |
|
"eval_rouge1": 0.0054, |
|
"eval_rouge2": 0.0041, |
|
"eval_rougeL": 0.005, |
|
"eval_rougeLsum": 0.0052, |
|
"eval_runtime": 9.4891, |
|
"eval_samples_per_second": 11.592, |
|
"eval_steps_per_second": 1.475, |
|
"step": 3272 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_gen_len": 8.2727, |
|
"eval_loss": 0.43666210770606995, |
|
"eval_rouge1": 0.0107, |
|
"eval_rouge2": 0.0079, |
|
"eval_rougeL": 0.0102, |
|
"eval_rougeLsum": 0.0101, |
|
"eval_runtime": 9.5096, |
|
"eval_samples_per_second": 11.567, |
|
"eval_steps_per_second": 1.472, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 120.98, |
|
"eval_gen_len": 8.7182, |
|
"eval_loss": 0.4338292181491852, |
|
"eval_rouge1": 0.0142, |
|
"eval_rouge2": 0.0102, |
|
"eval_rougeL": 0.0134, |
|
"eval_rougeLsum": 0.0131, |
|
"eval_runtime": 9.4732, |
|
"eval_samples_per_second": 11.612, |
|
"eval_steps_per_second": 1.478, |
|
"step": 3327 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_gen_len": 7.5727, |
|
"eval_loss": 0.4293038547039032, |
|
"eval_rouge1": 0.0045, |
|
"eval_rouge2": 0.0035, |
|
"eval_rougeL": 0.0039, |
|
"eval_rougeLsum": 0.0039, |
|
"eval_runtime": 9.458, |
|
"eval_samples_per_second": 11.63, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 122.98, |
|
"eval_gen_len": 7.8818, |
|
"eval_loss": 0.4247772991657257, |
|
"eval_rouge1": 0.0082, |
|
"eval_rouge2": 0.0056, |
|
"eval_rougeL": 0.0078, |
|
"eval_rougeLsum": 0.0076, |
|
"eval_runtime": 9.4976, |
|
"eval_samples_per_second": 11.582, |
|
"eval_steps_per_second": 1.474, |
|
"step": 3382 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_gen_len": 7.4273, |
|
"eval_loss": 0.4226304888725281, |
|
"eval_rouge1": 0.0047, |
|
"eval_rouge2": 0.0039, |
|
"eval_rougeL": 0.0047, |
|
"eval_rougeLsum": 0.0047, |
|
"eval_runtime": 9.4775, |
|
"eval_samples_per_second": 11.606, |
|
"eval_steps_per_second": 1.477, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 124.98, |
|
"eval_gen_len": 7.7091, |
|
"eval_loss": 0.4187226891517639, |
|
"eval_rouge1": 0.0096, |
|
"eval_rouge2": 0.0065, |
|
"eval_rougeL": 0.0097, |
|
"eval_rougeLsum": 0.0095, |
|
"eval_runtime": 9.4596, |
|
"eval_samples_per_second": 11.628, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3437 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_gen_len": 7.1364, |
|
"eval_loss": 0.4152156412601471, |
|
"eval_rouge1": 0.0026, |
|
"eval_rouge2": 0.0024, |
|
"eval_rougeL": 0.0026, |
|
"eval_rougeLsum": 0.0026, |
|
"eval_runtime": 9.4787, |
|
"eval_samples_per_second": 11.605, |
|
"eval_steps_per_second": 1.477, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 126.98, |
|
"eval_gen_len": 6.8909, |
|
"eval_loss": 0.4114760458469391, |
|
"eval_rouge1": 0.0026, |
|
"eval_rouge2": 0.0024, |
|
"eval_rougeL": 0.0026, |
|
"eval_rougeLsum": 0.0026, |
|
"eval_runtime": 9.4927, |
|
"eval_samples_per_second": 11.588, |
|
"eval_steps_per_second": 1.475, |
|
"step": 3492 |
|
}, |
|
{ |
|
"epoch": 127.27, |
|
"grad_norm": 5.880601406097412, |
|
"learning_rate": 8.493827160493828e-06, |
|
"loss": 0.6369, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_gen_len": 6.7182, |
|
"eval_loss": 0.4087616503238678, |
|
"eval_rouge1": 0.0051, |
|
"eval_rouge2": 0.0048, |
|
"eval_rougeL": 0.0051, |
|
"eval_rougeLsum": 0.0051, |
|
"eval_runtime": 9.5116, |
|
"eval_samples_per_second": 11.565, |
|
"eval_steps_per_second": 1.472, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 128.98, |
|
"eval_gen_len": 7.3091, |
|
"eval_loss": 0.4050390422344208, |
|
"eval_rouge1": 0.0113, |
|
"eval_rouge2": 0.0097, |
|
"eval_rougeL": 0.0115, |
|
"eval_rougeLsum": 0.0115, |
|
"eval_runtime": 9.5251, |
|
"eval_samples_per_second": 11.548, |
|
"eval_steps_per_second": 1.47, |
|
"step": 3547 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_gen_len": 7.2727, |
|
"eval_loss": 0.40342459082603455, |
|
"eval_rouge1": 0.0097, |
|
"eval_rouge2": 0.0086, |
|
"eval_rougeL": 0.0098, |
|
"eval_rougeLsum": 0.0099, |
|
"eval_runtime": 9.5061, |
|
"eval_samples_per_second": 11.572, |
|
"eval_steps_per_second": 1.473, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 130.98, |
|
"eval_gen_len": 6.9455, |
|
"eval_loss": 0.39917439222335815, |
|
"eval_rouge1": 0.0096, |
|
"eval_rouge2": 0.0081, |
|
"eval_rougeL": 0.0097, |
|
"eval_rougeLsum": 0.0097, |
|
"eval_runtime": 9.5045, |
|
"eval_samples_per_second": 11.573, |
|
"eval_steps_per_second": 1.473, |
|
"step": 3602 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_gen_len": 6.6, |
|
"eval_loss": 0.3954027593135834, |
|
"eval_rouge1": 0.0053, |
|
"eval_rouge2": 0.0056, |
|
"eval_rougeL": 0.0053, |
|
"eval_rougeLsum": 0.0061, |
|
"eval_runtime": 9.4882, |
|
"eval_samples_per_second": 11.593, |
|
"eval_steps_per_second": 1.476, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 132.98, |
|
"eval_gen_len": 6.6727, |
|
"eval_loss": 0.3916667103767395, |
|
"eval_rouge1": 0.0061, |
|
"eval_rouge2": 0.0056, |
|
"eval_rougeL": 0.006, |
|
"eval_rougeLsum": 0.0061, |
|
"eval_runtime": 9.4921, |
|
"eval_samples_per_second": 11.589, |
|
"eval_steps_per_second": 1.475, |
|
"step": 3657 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_gen_len": 6.9455, |
|
"eval_loss": 0.3922131359577179, |
|
"eval_rouge1": 0.0084, |
|
"eval_rouge2": 0.0072, |
|
"eval_rougeL": 0.0082, |
|
"eval_rougeLsum": 0.0086, |
|
"eval_runtime": 9.4995, |
|
"eval_samples_per_second": 11.58, |
|
"eval_steps_per_second": 1.474, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 134.98, |
|
"eval_gen_len": 6.3727, |
|
"eval_loss": 0.38674217462539673, |
|
"eval_rouge1": 0.0052, |
|
"eval_rouge2": 0.003, |
|
"eval_rougeL": 0.0046, |
|
"eval_rougeLsum": 0.0046, |
|
"eval_runtime": 9.4561, |
|
"eval_samples_per_second": 11.633, |
|
"eval_steps_per_second": 1.481, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_gen_len": 6.6273, |
|
"eval_loss": 0.38475027680397034, |
|
"eval_rouge1": 0.009, |
|
"eval_rouge2": 0.0061, |
|
"eval_rougeL": 0.0083, |
|
"eval_rougeLsum": 0.0086, |
|
"eval_runtime": 9.4487, |
|
"eval_samples_per_second": 11.642, |
|
"eval_steps_per_second": 1.482, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 136.98, |
|
"eval_gen_len": 6.4455, |
|
"eval_loss": 0.37981557846069336, |
|
"eval_rouge1": 0.0123, |
|
"eval_rouge2": 0.0095, |
|
"eval_rougeL": 0.0119, |
|
"eval_rougeLsum": 0.0121, |
|
"eval_runtime": 9.4838, |
|
"eval_samples_per_second": 11.599, |
|
"eval_steps_per_second": 1.476, |
|
"step": 3767 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_gen_len": 6.6909, |
|
"eval_loss": 0.3788022994995117, |
|
"eval_rouge1": 0.0138, |
|
"eval_rouge2": 0.01, |
|
"eval_rougeL": 0.0132, |
|
"eval_rougeLsum": 0.0133, |
|
"eval_runtime": 9.4736, |
|
"eval_samples_per_second": 11.611, |
|
"eval_steps_per_second": 1.478, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 138.98, |
|
"eval_gen_len": 6.3909, |
|
"eval_loss": 0.3755718171596527, |
|
"eval_rouge1": 0.0119, |
|
"eval_rouge2": 0.0085, |
|
"eval_rougeL": 0.0116, |
|
"eval_rougeLsum": 0.0116, |
|
"eval_runtime": 9.4922, |
|
"eval_samples_per_second": 11.588, |
|
"eval_steps_per_second": 1.475, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_gen_len": 6.6545, |
|
"eval_loss": 0.3745150864124298, |
|
"eval_rouge1": 0.0135, |
|
"eval_rouge2": 0.0095, |
|
"eval_rougeL": 0.013, |
|
"eval_rougeLsum": 0.013, |
|
"eval_runtime": 9.5082, |
|
"eval_samples_per_second": 11.569, |
|
"eval_steps_per_second": 1.472, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 140.98, |
|
"eval_gen_len": 6.9636, |
|
"eval_loss": 0.37223342061042786, |
|
"eval_rouge1": 0.0175, |
|
"eval_rouge2": 0.0123, |
|
"eval_rougeL": 0.0171, |
|
"eval_rougeLsum": 0.0168, |
|
"eval_runtime": 9.506, |
|
"eval_samples_per_second": 11.572, |
|
"eval_steps_per_second": 1.473, |
|
"step": 3877 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_gen_len": 7.0727, |
|
"eval_loss": 0.36922305822372437, |
|
"eval_rouge1": 0.0188, |
|
"eval_rouge2": 0.0127, |
|
"eval_rougeL": 0.0183, |
|
"eval_rougeLsum": 0.018, |
|
"eval_runtime": 9.4985, |
|
"eval_samples_per_second": 11.581, |
|
"eval_steps_per_second": 1.474, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 142.98, |
|
"eval_gen_len": 7.2727, |
|
"eval_loss": 0.3674834668636322, |
|
"eval_rouge1": 0.0201, |
|
"eval_rouge2": 0.0136, |
|
"eval_rougeL": 0.0197, |
|
"eval_rougeLsum": 0.0194, |
|
"eval_runtime": 9.4928, |
|
"eval_samples_per_second": 11.588, |
|
"eval_steps_per_second": 1.475, |
|
"step": 3932 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_gen_len": 7.1818, |
|
"eval_loss": 0.36526089906692505, |
|
"eval_rouge1": 0.0215, |
|
"eval_rouge2": 0.0139, |
|
"eval_rougeL": 0.0211, |
|
"eval_rougeLsum": 0.0208, |
|
"eval_runtime": 9.4987, |
|
"eval_samples_per_second": 11.581, |
|
"eval_steps_per_second": 1.474, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 144.98, |
|
"eval_gen_len": 7.2, |
|
"eval_loss": 0.36316850781440735, |
|
"eval_rouge1": 0.0209, |
|
"eval_rouge2": 0.0128, |
|
"eval_rougeL": 0.0201, |
|
"eval_rougeLsum": 0.0202, |
|
"eval_runtime": 9.4792, |
|
"eval_samples_per_second": 11.604, |
|
"eval_steps_per_second": 1.477, |
|
"step": 3987 |
|
}, |
|
{ |
|
"epoch": 145.45, |
|
"grad_norm": 0.6945245265960693, |
|
"learning_rate": 6.847736625514404e-06, |
|
"loss": 0.5099, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_gen_len": 7.1364, |
|
"eval_loss": 0.3603822886943817, |
|
"eval_rouge1": 0.022, |
|
"eval_rouge2": 0.0145, |
|
"eval_rougeL": 0.0213, |
|
"eval_rougeLsum": 0.0212, |
|
"eval_runtime": 9.5107, |
|
"eval_samples_per_second": 11.566, |
|
"eval_steps_per_second": 1.472, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 146.98, |
|
"eval_gen_len": 7.1182, |
|
"eval_loss": 0.35853010416030884, |
|
"eval_rouge1": 0.022, |
|
"eval_rouge2": 0.0145, |
|
"eval_rougeL": 0.0213, |
|
"eval_rougeLsum": 0.0212, |
|
"eval_runtime": 9.5074, |
|
"eval_samples_per_second": 11.57, |
|
"eval_steps_per_second": 1.473, |
|
"step": 4042 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_gen_len": 7.7364, |
|
"eval_loss": 0.35745835304260254, |
|
"eval_rouge1": 0.0283, |
|
"eval_rouge2": 0.018, |
|
"eval_rougeL": 0.0269, |
|
"eval_rougeLsum": 0.0269, |
|
"eval_runtime": 9.482, |
|
"eval_samples_per_second": 11.601, |
|
"eval_steps_per_second": 1.476, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 148.98, |
|
"eval_gen_len": 8.3364, |
|
"eval_loss": 0.3559305965900421, |
|
"eval_rouge1": 0.0374, |
|
"eval_rouge2": 0.0236, |
|
"eval_rougeL": 0.0357, |
|
"eval_rougeLsum": 0.0359, |
|
"eval_runtime": 9.4877, |
|
"eval_samples_per_second": 11.594, |
|
"eval_steps_per_second": 1.476, |
|
"step": 4097 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_gen_len": 8.9091, |
|
"eval_loss": 0.3528214395046234, |
|
"eval_rouge1": 0.0384, |
|
"eval_rouge2": 0.0236, |
|
"eval_rougeL": 0.0366, |
|
"eval_rougeLsum": 0.0369, |
|
"eval_runtime": 9.4747, |
|
"eval_samples_per_second": 11.61, |
|
"eval_steps_per_second": 1.478, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 150.98, |
|
"eval_gen_len": 9.4455, |
|
"eval_loss": 0.35084155201911926, |
|
"eval_rouge1": 0.0416, |
|
"eval_rouge2": 0.0254, |
|
"eval_rougeL": 0.0399, |
|
"eval_rougeLsum": 0.0399, |
|
"eval_runtime": 9.4685, |
|
"eval_samples_per_second": 11.618, |
|
"eval_steps_per_second": 1.479, |
|
"step": 4152 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_gen_len": 9.6091, |
|
"eval_loss": 0.34896430373191833, |
|
"eval_rouge1": 0.0439, |
|
"eval_rouge2": 0.0257, |
|
"eval_rougeL": 0.0413, |
|
"eval_rougeLsum": 0.0415, |
|
"eval_runtime": 9.5218, |
|
"eval_samples_per_second": 11.552, |
|
"eval_steps_per_second": 1.47, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 152.98, |
|
"eval_gen_len": 9.7636, |
|
"eval_loss": 0.34783267974853516, |
|
"eval_rouge1": 0.0479, |
|
"eval_rouge2": 0.0297, |
|
"eval_rougeL": 0.045, |
|
"eval_rougeLsum": 0.0454, |
|
"eval_runtime": 9.5136, |
|
"eval_samples_per_second": 11.562, |
|
"eval_steps_per_second": 1.472, |
|
"step": 4207 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_gen_len": 10.1909, |
|
"eval_loss": 0.3452661335468292, |
|
"eval_rouge1": 0.0495, |
|
"eval_rouge2": 0.0291, |
|
"eval_rougeL": 0.0464, |
|
"eval_rougeLsum": 0.0464, |
|
"eval_runtime": 9.5097, |
|
"eval_samples_per_second": 11.567, |
|
"eval_steps_per_second": 1.472, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 154.98, |
|
"eval_gen_len": 10.6, |
|
"eval_loss": 0.34583330154418945, |
|
"eval_rouge1": 0.0576, |
|
"eval_rouge2": 0.035, |
|
"eval_rougeL": 0.055, |
|
"eval_rougeLsum": 0.0551, |
|
"eval_runtime": 9.4851, |
|
"eval_samples_per_second": 11.597, |
|
"eval_steps_per_second": 1.476, |
|
"step": 4262 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_gen_len": 10.2909, |
|
"eval_loss": 0.3417557179927826, |
|
"eval_rouge1": 0.0533, |
|
"eval_rouge2": 0.0314, |
|
"eval_rougeL": 0.0506, |
|
"eval_rougeLsum": 0.0507, |
|
"eval_runtime": 9.474, |
|
"eval_samples_per_second": 11.611, |
|
"eval_steps_per_second": 1.478, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 156.98, |
|
"eval_gen_len": 10.9364, |
|
"eval_loss": 0.3396497666835785, |
|
"eval_rouge1": 0.0591, |
|
"eval_rouge2": 0.0351, |
|
"eval_rougeL": 0.0561, |
|
"eval_rougeLsum": 0.0561, |
|
"eval_runtime": 9.496, |
|
"eval_samples_per_second": 11.584, |
|
"eval_steps_per_second": 1.474, |
|
"step": 4317 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_gen_len": 11.0364, |
|
"eval_loss": 0.3386593759059906, |
|
"eval_rouge1": 0.0633, |
|
"eval_rouge2": 0.0387, |
|
"eval_rougeL": 0.0605, |
|
"eval_rougeLsum": 0.0601, |
|
"eval_runtime": 9.4912, |
|
"eval_samples_per_second": 11.59, |
|
"eval_steps_per_second": 1.475, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 158.98, |
|
"eval_gen_len": 11.4455, |
|
"eval_loss": 0.3368191123008728, |
|
"eval_rouge1": 0.0614, |
|
"eval_rouge2": 0.0371, |
|
"eval_rougeL": 0.0593, |
|
"eval_rougeLsum": 0.0583, |
|
"eval_runtime": 9.5012, |
|
"eval_samples_per_second": 11.578, |
|
"eval_steps_per_second": 1.474, |
|
"step": 4372 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_gen_len": 11.4545, |
|
"eval_loss": 0.33700016140937805, |
|
"eval_rouge1": 0.0702, |
|
"eval_rouge2": 0.0444, |
|
"eval_rougeL": 0.0672, |
|
"eval_rougeLsum": 0.0671, |
|
"eval_runtime": 9.4911, |
|
"eval_samples_per_second": 11.59, |
|
"eval_steps_per_second": 1.475, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 160.98, |
|
"eval_gen_len": 11.4182, |
|
"eval_loss": 0.3347805440425873, |
|
"eval_rouge1": 0.0702, |
|
"eval_rouge2": 0.0444, |
|
"eval_rougeL": 0.0672, |
|
"eval_rougeLsum": 0.0671, |
|
"eval_runtime": 9.4829, |
|
"eval_samples_per_second": 11.6, |
|
"eval_steps_per_second": 1.476, |
|
"step": 4427 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_gen_len": 11.2818, |
|
"eval_loss": 0.332653284072876, |
|
"eval_rouge1": 0.0691, |
|
"eval_rouge2": 0.0438, |
|
"eval_rougeL": 0.0661, |
|
"eval_rougeLsum": 0.0656, |
|
"eval_runtime": 9.5355, |
|
"eval_samples_per_second": 11.536, |
|
"eval_steps_per_second": 1.468, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 162.98, |
|
"eval_gen_len": 11.3545, |
|
"eval_loss": 0.3307643234729767, |
|
"eval_rouge1": 0.0648, |
|
"eval_rouge2": 0.0405, |
|
"eval_rougeL": 0.0614, |
|
"eval_rougeLsum": 0.0609, |
|
"eval_runtime": 9.4851, |
|
"eval_samples_per_second": 11.597, |
|
"eval_steps_per_second": 1.476, |
|
"step": 4482 |
|
}, |
|
{ |
|
"epoch": 163.64, |
|
"grad_norm": 0.6090702414512634, |
|
"learning_rate": 5.20164609053498e-06, |
|
"loss": 0.4471, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_gen_len": 11.7909, |
|
"eval_loss": 0.32992881536483765, |
|
"eval_rouge1": 0.0711, |
|
"eval_rouge2": 0.0441, |
|
"eval_rougeL": 0.0677, |
|
"eval_rougeLsum": 0.0667, |
|
"eval_runtime": 9.5142, |
|
"eval_samples_per_second": 11.562, |
|
"eval_steps_per_second": 1.471, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 164.98, |
|
"eval_gen_len": 12.0273, |
|
"eval_loss": 0.3291892111301422, |
|
"eval_rouge1": 0.0749, |
|
"eval_rouge2": 0.0476, |
|
"eval_rougeL": 0.0709, |
|
"eval_rougeLsum": 0.0701, |
|
"eval_runtime": 9.5232, |
|
"eval_samples_per_second": 11.551, |
|
"eval_steps_per_second": 1.47, |
|
"step": 4537 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_gen_len": 12.4364, |
|
"eval_loss": 0.3260752558708191, |
|
"eval_rouge1": 0.078, |
|
"eval_rouge2": 0.0484, |
|
"eval_rougeL": 0.0728, |
|
"eval_rougeLsum": 0.0724, |
|
"eval_runtime": 9.5359, |
|
"eval_samples_per_second": 11.535, |
|
"eval_steps_per_second": 1.468, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 166.98, |
|
"eval_gen_len": 12.6636, |
|
"eval_loss": 0.32543399930000305, |
|
"eval_rouge1": 0.0865, |
|
"eval_rouge2": 0.0555, |
|
"eval_rougeL": 0.0812, |
|
"eval_rougeLsum": 0.0806, |
|
"eval_runtime": 9.5501, |
|
"eval_samples_per_second": 11.518, |
|
"eval_steps_per_second": 1.466, |
|
"step": 4592 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_gen_len": 12.8455, |
|
"eval_loss": 0.3237576484680176, |
|
"eval_rouge1": 0.081, |
|
"eval_rouge2": 0.0501, |
|
"eval_rougeL": 0.0763, |
|
"eval_rougeLsum": 0.0754, |
|
"eval_runtime": 9.5386, |
|
"eval_samples_per_second": 11.532, |
|
"eval_steps_per_second": 1.468, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 168.98, |
|
"eval_gen_len": 13.1273, |
|
"eval_loss": 0.32304662466049194, |
|
"eval_rouge1": 0.0899, |
|
"eval_rouge2": 0.0584, |
|
"eval_rougeL": 0.0846, |
|
"eval_rougeLsum": 0.0836, |
|
"eval_runtime": 9.4963, |
|
"eval_samples_per_second": 11.583, |
|
"eval_steps_per_second": 1.474, |
|
"step": 4647 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"eval_gen_len": 13.5364, |
|
"eval_loss": 0.3217833638191223, |
|
"eval_rouge1": 0.0936, |
|
"eval_rouge2": 0.0579, |
|
"eval_rougeL": 0.0864, |
|
"eval_rougeLsum": 0.0854, |
|
"eval_runtime": 9.5121, |
|
"eval_samples_per_second": 11.564, |
|
"eval_steps_per_second": 1.472, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 170.98, |
|
"eval_gen_len": 13.9455, |
|
"eval_loss": 0.3209010660648346, |
|
"eval_rouge1": 0.0976, |
|
"eval_rouge2": 0.063, |
|
"eval_rougeL": 0.0914, |
|
"eval_rougeLsum": 0.0904, |
|
"eval_runtime": 9.507, |
|
"eval_samples_per_second": 11.57, |
|
"eval_steps_per_second": 1.473, |
|
"step": 4702 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_gen_len": 14.2545, |
|
"eval_loss": 0.31976738572120667, |
|
"eval_rouge1": 0.1024, |
|
"eval_rouge2": 0.0663, |
|
"eval_rougeL": 0.0959, |
|
"eval_rougeLsum": 0.0946, |
|
"eval_runtime": 9.527, |
|
"eval_samples_per_second": 11.546, |
|
"eval_steps_per_second": 1.47, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 172.98, |
|
"eval_gen_len": 13.7909, |
|
"eval_loss": 0.31930533051490784, |
|
"eval_rouge1": 0.0943, |
|
"eval_rouge2": 0.0596, |
|
"eval_rougeL": 0.0883, |
|
"eval_rougeLsum": 0.0873, |
|
"eval_runtime": 9.5147, |
|
"eval_samples_per_second": 11.561, |
|
"eval_steps_per_second": 1.471, |
|
"step": 4757 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"eval_gen_len": 13.9182, |
|
"eval_loss": 0.3177041709423065, |
|
"eval_rouge1": 0.0964, |
|
"eval_rouge2": 0.0605, |
|
"eval_rougeL": 0.0905, |
|
"eval_rougeLsum": 0.0896, |
|
"eval_runtime": 9.4816, |
|
"eval_samples_per_second": 11.601, |
|
"eval_steps_per_second": 1.477, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 174.98, |
|
"eval_gen_len": 13.4636, |
|
"eval_loss": 0.3157893121242523, |
|
"eval_rouge1": 0.0978, |
|
"eval_rouge2": 0.0628, |
|
"eval_rougeL": 0.0918, |
|
"eval_rougeLsum": 0.0905, |
|
"eval_runtime": 9.4906, |
|
"eval_samples_per_second": 11.59, |
|
"eval_steps_per_second": 1.475, |
|
"step": 4812 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_gen_len": 14.3273, |
|
"eval_loss": 0.3147883713245392, |
|
"eval_rouge1": 0.1021, |
|
"eval_rouge2": 0.0643, |
|
"eval_rougeL": 0.0951, |
|
"eval_rougeLsum": 0.0943, |
|
"eval_runtime": 9.5161, |
|
"eval_samples_per_second": 11.559, |
|
"eval_steps_per_second": 1.471, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 176.98, |
|
"eval_gen_len": 14.3727, |
|
"eval_loss": 0.31470227241516113, |
|
"eval_rouge1": 0.1043, |
|
"eval_rouge2": 0.0661, |
|
"eval_rougeL": 0.0982, |
|
"eval_rougeLsum": 0.0972, |
|
"eval_runtime": 9.5231, |
|
"eval_samples_per_second": 11.551, |
|
"eval_steps_per_second": 1.47, |
|
"step": 4867 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"eval_gen_len": 14.4455, |
|
"eval_loss": 0.31245002150535583, |
|
"eval_rouge1": 0.1068, |
|
"eval_rouge2": 0.0691, |
|
"eval_rougeL": 0.1016, |
|
"eval_rougeLsum": 0.1001, |
|
"eval_runtime": 9.5249, |
|
"eval_samples_per_second": 11.549, |
|
"eval_steps_per_second": 1.47, |
|
"step": 4895 |
|
}, |
|
{ |
|
"epoch": 178.98, |
|
"eval_gen_len": 14.7091, |
|
"eval_loss": 0.3121263384819031, |
|
"eval_rouge1": 0.1073, |
|
"eval_rouge2": 0.0682, |
|
"eval_rougeL": 0.1012, |
|
"eval_rougeLsum": 0.1, |
|
"eval_runtime": 9.531, |
|
"eval_samples_per_second": 11.541, |
|
"eval_steps_per_second": 1.469, |
|
"step": 4922 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_gen_len": 14.7182, |
|
"eval_loss": 0.310048907995224, |
|
"eval_rouge1": 0.1095, |
|
"eval_rouge2": 0.0698, |
|
"eval_rougeL": 0.1038, |
|
"eval_rougeLsum": 0.1022, |
|
"eval_runtime": 9.5031, |
|
"eval_samples_per_second": 11.575, |
|
"eval_steps_per_second": 1.473, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 180.98, |
|
"eval_gen_len": 14.5727, |
|
"eval_loss": 0.3098377585411072, |
|
"eval_rouge1": 0.1122, |
|
"eval_rouge2": 0.0717, |
|
"eval_rougeL": 0.1054, |
|
"eval_rougeLsum": 0.1048, |
|
"eval_runtime": 9.5178, |
|
"eval_samples_per_second": 11.557, |
|
"eval_steps_per_second": 1.471, |
|
"step": 4977 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"grad_norm": 0.6373523473739624, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.4093, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"eval_gen_len": 14.8182, |
|
"eval_loss": 0.3088683784008026, |
|
"eval_rouge1": 0.1148, |
|
"eval_rouge2": 0.074, |
|
"eval_rougeL": 0.1082, |
|
"eval_rougeLsum": 0.1071, |
|
"eval_runtime": 9.5147, |
|
"eval_samples_per_second": 11.561, |
|
"eval_steps_per_second": 1.471, |
|
"step": 5005 |
|
}, |
|
{ |
|
"epoch": 182.98, |
|
"eval_gen_len": 14.6727, |
|
"eval_loss": 0.30828168988227844, |
|
"eval_rouge1": 0.1088, |
|
"eval_rouge2": 0.0689, |
|
"eval_rougeL": 0.1021, |
|
"eval_rougeLsum": 0.1014, |
|
"eval_runtime": 9.5092, |
|
"eval_samples_per_second": 11.568, |
|
"eval_steps_per_second": 1.472, |
|
"step": 5032 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_gen_len": 15.2182, |
|
"eval_loss": 0.30716758966445923, |
|
"eval_rouge1": 0.1167, |
|
"eval_rouge2": 0.0746, |
|
"eval_rougeL": 0.1098, |
|
"eval_rougeLsum": 0.1084, |
|
"eval_runtime": 9.5556, |
|
"eval_samples_per_second": 11.512, |
|
"eval_steps_per_second": 1.465, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 184.98, |
|
"eval_gen_len": 15.9364, |
|
"eval_loss": 0.3059370815753937, |
|
"eval_rouge1": 0.1233, |
|
"eval_rouge2": 0.08, |
|
"eval_rougeL": 0.1166, |
|
"eval_rougeLsum": 0.1157, |
|
"eval_runtime": 9.5537, |
|
"eval_samples_per_second": 11.514, |
|
"eval_steps_per_second": 1.465, |
|
"step": 5087 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"eval_gen_len": 15.1727, |
|
"eval_loss": 0.3056795597076416, |
|
"eval_rouge1": 0.1128, |
|
"eval_rouge2": 0.0707, |
|
"eval_rougeL": 0.1055, |
|
"eval_rougeLsum": 0.1049, |
|
"eval_runtime": 9.5621, |
|
"eval_samples_per_second": 11.504, |
|
"eval_steps_per_second": 1.464, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 186.98, |
|
"eval_gen_len": 15.1818, |
|
"eval_loss": 0.3043256402015686, |
|
"eval_rouge1": 0.1131, |
|
"eval_rouge2": 0.0707, |
|
"eval_rougeL": 0.1057, |
|
"eval_rougeLsum": 0.105, |
|
"eval_runtime": 9.5545, |
|
"eval_samples_per_second": 11.513, |
|
"eval_steps_per_second": 1.465, |
|
"step": 5142 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_gen_len": 15.1727, |
|
"eval_loss": 0.30425599217414856, |
|
"eval_rouge1": 0.1125, |
|
"eval_rouge2": 0.0703, |
|
"eval_rougeL": 0.1052, |
|
"eval_rougeLsum": 0.1046, |
|
"eval_runtime": 9.5018, |
|
"eval_samples_per_second": 11.577, |
|
"eval_steps_per_second": 1.473, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 188.98, |
|
"eval_gen_len": 15.1636, |
|
"eval_loss": 0.30395984649658203, |
|
"eval_rouge1": 0.1128, |
|
"eval_rouge2": 0.0705, |
|
"eval_rougeL": 0.1054, |
|
"eval_rougeLsum": 0.1049, |
|
"eval_runtime": 9.5213, |
|
"eval_samples_per_second": 11.553, |
|
"eval_steps_per_second": 1.47, |
|
"step": 5197 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"eval_gen_len": 15.2455, |
|
"eval_loss": 0.30324217677116394, |
|
"eval_rouge1": 0.1136, |
|
"eval_rouge2": 0.0705, |
|
"eval_rougeL": 0.1061, |
|
"eval_rougeLsum": 0.1056, |
|
"eval_runtime": 9.5432, |
|
"eval_samples_per_second": 11.527, |
|
"eval_steps_per_second": 1.467, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 190.98, |
|
"eval_gen_len": 15.6182, |
|
"eval_loss": 0.30257585644721985, |
|
"eval_rouge1": 0.1149, |
|
"eval_rouge2": 0.071, |
|
"eval_rougeL": 0.1075, |
|
"eval_rougeLsum": 0.107, |
|
"eval_runtime": 9.5398, |
|
"eval_samples_per_second": 11.531, |
|
"eval_steps_per_second": 1.468, |
|
"step": 5252 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_gen_len": 15.7545, |
|
"eval_loss": 0.301755428314209, |
|
"eval_rouge1": 0.118, |
|
"eval_rouge2": 0.0744, |
|
"eval_rougeL": 0.1114, |
|
"eval_rougeLsum": 0.1105, |
|
"eval_runtime": 9.5491, |
|
"eval_samples_per_second": 11.519, |
|
"eval_steps_per_second": 1.466, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 192.98, |
|
"eval_gen_len": 15.7545, |
|
"eval_loss": 0.30100175738334656, |
|
"eval_rouge1": 0.1186, |
|
"eval_rouge2": 0.0756, |
|
"eval_rougeL": 0.1122, |
|
"eval_rougeLsum": 0.1116, |
|
"eval_runtime": 9.5605, |
|
"eval_samples_per_second": 11.506, |
|
"eval_steps_per_second": 1.464, |
|
"step": 5307 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"eval_gen_len": 15.6727, |
|
"eval_loss": 0.3014240562915802, |
|
"eval_rouge1": 0.1169, |
|
"eval_rouge2": 0.0738, |
|
"eval_rougeL": 0.1106, |
|
"eval_rougeLsum": 0.1094, |
|
"eval_runtime": 9.5649, |
|
"eval_samples_per_second": 11.5, |
|
"eval_steps_per_second": 1.464, |
|
"step": 5335 |
|
}, |
|
{ |
|
"epoch": 194.98, |
|
"eval_gen_len": 15.6364, |
|
"eval_loss": 0.3001127541065216, |
|
"eval_rouge1": 0.1161, |
|
"eval_rouge2": 0.0734, |
|
"eval_rougeL": 0.1104, |
|
"eval_rougeLsum": 0.1092, |
|
"eval_runtime": 9.558, |
|
"eval_samples_per_second": 11.509, |
|
"eval_steps_per_second": 1.465, |
|
"step": 5362 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_gen_len": 15.7909, |
|
"eval_loss": 0.29918792843818665, |
|
"eval_rouge1": 0.1192, |
|
"eval_rouge2": 0.0752, |
|
"eval_rougeL": 0.1132, |
|
"eval_rougeLsum": 0.1125, |
|
"eval_runtime": 9.5635, |
|
"eval_samples_per_second": 11.502, |
|
"eval_steps_per_second": 1.464, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 196.98, |
|
"eval_gen_len": 15.6364, |
|
"eval_loss": 0.2991277277469635, |
|
"eval_rouge1": 0.1205, |
|
"eval_rouge2": 0.0772, |
|
"eval_rougeL": 0.1141, |
|
"eval_rougeLsum": 0.1133, |
|
"eval_runtime": 9.524, |
|
"eval_samples_per_second": 11.55, |
|
"eval_steps_per_second": 1.47, |
|
"step": 5417 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"eval_gen_len": 15.7909, |
|
"eval_loss": 0.2986967861652374, |
|
"eval_rouge1": 0.1202, |
|
"eval_rouge2": 0.0768, |
|
"eval_rougeL": 0.1143, |
|
"eval_rougeLsum": 0.1138, |
|
"eval_runtime": 9.5106, |
|
"eval_samples_per_second": 11.566, |
|
"eval_steps_per_second": 1.472, |
|
"step": 5445 |
|
}, |
|
{ |
|
"epoch": 198.98, |
|
"eval_gen_len": 15.6182, |
|
"eval_loss": 0.2981720566749573, |
|
"eval_rouge1": 0.1217, |
|
"eval_rouge2": 0.0793, |
|
"eval_rougeL": 0.1153, |
|
"eval_rougeLsum": 0.1148, |
|
"eval_runtime": 9.5676, |
|
"eval_samples_per_second": 11.497, |
|
"eval_steps_per_second": 1.463, |
|
"step": 5472 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"grad_norm": 1.2200640439987183, |
|
"learning_rate": 1.909465020576132e-06, |
|
"loss": 0.3864, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_gen_len": 15.7818, |
|
"eval_loss": 0.29755449295043945, |
|
"eval_rouge1": 0.1218, |
|
"eval_rouge2": 0.079, |
|
"eval_rougeL": 0.1154, |
|
"eval_rougeLsum": 0.1147, |
|
"eval_runtime": 9.547, |
|
"eval_samples_per_second": 11.522, |
|
"eval_steps_per_second": 1.466, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 200.98, |
|
"eval_gen_len": 15.6182, |
|
"eval_loss": 0.29704758524894714, |
|
"eval_rouge1": 0.1219, |
|
"eval_rouge2": 0.0811, |
|
"eval_rougeL": 0.1163, |
|
"eval_rougeLsum": 0.116, |
|
"eval_runtime": 9.5552, |
|
"eval_samples_per_second": 11.512, |
|
"eval_steps_per_second": 1.465, |
|
"step": 5527 |
|
}, |
|
{ |
|
"epoch": 202.0, |
|
"eval_gen_len": 15.5273, |
|
"eval_loss": 0.29719075560569763, |
|
"eval_rouge1": 0.122, |
|
"eval_rouge2": 0.0811, |
|
"eval_rougeL": 0.116, |
|
"eval_rougeLsum": 0.1157, |
|
"eval_runtime": 9.5815, |
|
"eval_samples_per_second": 11.48, |
|
"eval_steps_per_second": 1.461, |
|
"step": 5555 |
|
}, |
|
{ |
|
"epoch": 202.98, |
|
"eval_gen_len": 15.6909, |
|
"eval_loss": 0.29677239060401917, |
|
"eval_rouge1": 0.1209, |
|
"eval_rouge2": 0.0803, |
|
"eval_rougeL": 0.1156, |
|
"eval_rougeLsum": 0.1149, |
|
"eval_runtime": 9.499, |
|
"eval_samples_per_second": 11.58, |
|
"eval_steps_per_second": 1.474, |
|
"step": 5582 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_gen_len": 15.5273, |
|
"eval_loss": 0.2963531017303467, |
|
"eval_rouge1": 0.1251, |
|
"eval_rouge2": 0.0846, |
|
"eval_rougeL": 0.1207, |
|
"eval_rougeLsum": 0.1194, |
|
"eval_runtime": 9.5143, |
|
"eval_samples_per_second": 11.562, |
|
"eval_steps_per_second": 1.471, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 204.98, |
|
"eval_gen_len": 15.6909, |
|
"eval_loss": 0.29531627893447876, |
|
"eval_rouge1": 0.1239, |
|
"eval_rouge2": 0.0831, |
|
"eval_rougeL": 0.1193, |
|
"eval_rougeLsum": 0.1184, |
|
"eval_runtime": 9.5198, |
|
"eval_samples_per_second": 11.555, |
|
"eval_steps_per_second": 1.471, |
|
"step": 5637 |
|
}, |
|
{ |
|
"epoch": 206.0, |
|
"eval_gen_len": 15.5273, |
|
"eval_loss": 0.29536357522010803, |
|
"eval_rouge1": 0.1236, |
|
"eval_rouge2": 0.0835, |
|
"eval_rougeL": 0.1192, |
|
"eval_rougeLsum": 0.1182, |
|
"eval_runtime": 9.5066, |
|
"eval_samples_per_second": 11.571, |
|
"eval_steps_per_second": 1.473, |
|
"step": 5665 |
|
}, |
|
{ |
|
"epoch": 206.98, |
|
"eval_gen_len": 15.6727, |
|
"eval_loss": 0.2951861023902893, |
|
"eval_rouge1": 0.1236, |
|
"eval_rouge2": 0.0832, |
|
"eval_rougeL": 0.1191, |
|
"eval_rougeLsum": 0.1181, |
|
"eval_runtime": 9.5179, |
|
"eval_samples_per_second": 11.557, |
|
"eval_steps_per_second": 1.471, |
|
"step": 5692 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_gen_len": 15.8273, |
|
"eval_loss": 0.29457393288612366, |
|
"eval_rouge1": 0.1262, |
|
"eval_rouge2": 0.0856, |
|
"eval_rougeL": 0.1223, |
|
"eval_rougeLsum": 0.121, |
|
"eval_runtime": 9.5235, |
|
"eval_samples_per_second": 11.55, |
|
"eval_steps_per_second": 1.47, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 208.98, |
|
"eval_gen_len": 15.8273, |
|
"eval_loss": 0.29461580514907837, |
|
"eval_rouge1": 0.1269, |
|
"eval_rouge2": 0.086, |
|
"eval_rougeL": 0.1227, |
|
"eval_rougeLsum": 0.1213, |
|
"eval_runtime": 9.5193, |
|
"eval_samples_per_second": 11.556, |
|
"eval_steps_per_second": 1.471, |
|
"step": 5747 |
|
}, |
|
{ |
|
"epoch": 210.0, |
|
"eval_gen_len": 15.6727, |
|
"eval_loss": 0.2948046028614044, |
|
"eval_rouge1": 0.1261, |
|
"eval_rouge2": 0.0859, |
|
"eval_rougeL": 0.1221, |
|
"eval_rougeLsum": 0.1208, |
|
"eval_runtime": 9.5297, |
|
"eval_samples_per_second": 11.543, |
|
"eval_steps_per_second": 1.469, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 210.98, |
|
"eval_gen_len": 15.7636, |
|
"eval_loss": 0.29468077421188354, |
|
"eval_rouge1": 0.129, |
|
"eval_rouge2": 0.0888, |
|
"eval_rougeL": 0.1244, |
|
"eval_rougeLsum": 0.1235, |
|
"eval_runtime": 9.5285, |
|
"eval_samples_per_second": 11.544, |
|
"eval_steps_per_second": 1.469, |
|
"step": 5802 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_gen_len": 15.9091, |
|
"eval_loss": 0.2943172752857208, |
|
"eval_rouge1": 0.1308, |
|
"eval_rouge2": 0.0909, |
|
"eval_rougeL": 0.1267, |
|
"eval_rougeLsum": 0.1254, |
|
"eval_runtime": 9.5333, |
|
"eval_samples_per_second": 11.538, |
|
"eval_steps_per_second": 1.469, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 212.98, |
|
"eval_gen_len": 15.9091, |
|
"eval_loss": 0.2938406467437744, |
|
"eval_rouge1": 0.1293, |
|
"eval_rouge2": 0.0888, |
|
"eval_rougeL": 0.1251, |
|
"eval_rougeLsum": 0.1236, |
|
"eval_runtime": 9.5369, |
|
"eval_samples_per_second": 11.534, |
|
"eval_steps_per_second": 1.468, |
|
"step": 5857 |
|
}, |
|
{ |
|
"epoch": 214.0, |
|
"eval_gen_len": 15.9727, |
|
"eval_loss": 0.2933821678161621, |
|
"eval_rouge1": 0.1284, |
|
"eval_rouge2": 0.0877, |
|
"eval_rougeL": 0.1243, |
|
"eval_rougeLsum": 0.123, |
|
"eval_runtime": 9.5327, |
|
"eval_samples_per_second": 11.539, |
|
"eval_steps_per_second": 1.469, |
|
"step": 5885 |
|
}, |
|
{ |
|
"epoch": 214.98, |
|
"eval_gen_len": 15.8727, |
|
"eval_loss": 0.2933785617351532, |
|
"eval_rouge1": 0.1262, |
|
"eval_rouge2": 0.0855, |
|
"eval_rougeL": 0.1221, |
|
"eval_rougeLsum": 0.1208, |
|
"eval_runtime": 9.542, |
|
"eval_samples_per_second": 11.528, |
|
"eval_steps_per_second": 1.467, |
|
"step": 5912 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_gen_len": 15.8727, |
|
"eval_loss": 0.29337677359580994, |
|
"eval_rouge1": 0.1267, |
|
"eval_rouge2": 0.0863, |
|
"eval_rougeL": 0.1225, |
|
"eval_rougeLsum": 0.1214, |
|
"eval_runtime": 9.5387, |
|
"eval_samples_per_second": 11.532, |
|
"eval_steps_per_second": 1.468, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 216.98, |
|
"eval_gen_len": 15.9636, |
|
"eval_loss": 0.2933517396450043, |
|
"eval_rouge1": 0.1292, |
|
"eval_rouge2": 0.0894, |
|
"eval_rougeL": 0.1254, |
|
"eval_rougeLsum": 0.1241, |
|
"eval_runtime": 9.5629, |
|
"eval_samples_per_second": 11.503, |
|
"eval_steps_per_second": 1.464, |
|
"step": 5967 |
|
}, |
|
{ |
|
"epoch": 218.0, |
|
"eval_gen_len": 15.9636, |
|
"eval_loss": 0.29335835576057434, |
|
"eval_rouge1": 0.1289, |
|
"eval_rouge2": 0.0889, |
|
"eval_rougeL": 0.1249, |
|
"eval_rougeLsum": 0.1234, |
|
"eval_runtime": 9.5531, |
|
"eval_samples_per_second": 11.515, |
|
"eval_steps_per_second": 1.465, |
|
"step": 5995 |
|
}, |
|
{ |
|
"epoch": 218.18, |
|
"grad_norm": 0.6804682016372681, |
|
"learning_rate": 2.6337448559670784e-07, |
|
"loss": 0.3747, |
|
"step": 6000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6075, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 225, |
|
"save_steps": 500, |
|
"total_flos": 1.1611644167297434e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|