{ "best_metric": null, "best_model_checkpoint": null, "epoch": 218.1818181818182, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.98, "eval_gen_len": 14.9909, "eval_loss": 21.427785873413086, "eval_rouge1": 0.0931, "eval_rouge2": 0.021, "eval_rougeL": 0.0719, "eval_rougeLsum": 0.0715, "eval_runtime": 11.4039, "eval_samples_per_second": 9.646, "eval_steps_per_second": 1.228, "step": 27 }, { "epoch": 2.0, "eval_gen_len": 14.9909, "eval_loss": 21.134571075439453, "eval_rouge1": 0.0948, "eval_rouge2": 0.0238, "eval_rougeL": 0.073, "eval_rougeLsum": 0.0724, "eval_runtime": 9.5984, "eval_samples_per_second": 11.46, "eval_steps_per_second": 1.459, "step": 55 }, { "epoch": 2.98, "eval_gen_len": 14.9909, "eval_loss": 20.702985763549805, "eval_rouge1": 0.0917, "eval_rouge2": 0.0203, "eval_rougeL": 0.0707, "eval_rougeLsum": 0.0703, "eval_runtime": 9.5997, "eval_samples_per_second": 11.459, "eval_steps_per_second": 1.458, "step": 82 }, { "epoch": 4.0, "eval_gen_len": 15.3455, "eval_loss": 20.22304344177246, "eval_rouge1": 0.0991, "eval_rouge2": 0.0246, "eval_rougeL": 0.0781, "eval_rougeLsum": 0.0778, "eval_runtime": 9.5876, "eval_samples_per_second": 11.473, "eval_steps_per_second": 1.46, "step": 110 }, { "epoch": 4.98, "eval_gen_len": 15.4818, "eval_loss": 19.888574600219727, "eval_rouge1": 0.1057, "eval_rouge2": 0.0297, "eval_rougeL": 0.0845, "eval_rougeLsum": 0.0845, "eval_runtime": 9.549, "eval_samples_per_second": 11.519, "eval_steps_per_second": 1.466, "step": 137 }, { "epoch": 6.0, "eval_gen_len": 15.7182, "eval_loss": 19.539011001586914, "eval_rouge1": 0.1034, "eval_rouge2": 0.0276, "eval_rougeL": 0.084, "eval_rougeLsum": 0.0839, "eval_runtime": 9.551, "eval_samples_per_second": 11.517, "eval_steps_per_second": 1.466, "step": 165 }, { "epoch": 6.98, "eval_gen_len": 15.8727, "eval_loss": 19.162174224853516, "eval_rouge1": 0.1023, "eval_rouge2": 0.0303, "eval_rougeL": 0.0837, "eval_rougeLsum": 0.0835, "eval_runtime": 9.5511, "eval_samples_per_second": 11.517, "eval_steps_per_second": 1.466, "step": 192 }, { "epoch": 8.0, "eval_gen_len": 15.5091, "eval_loss": 18.730688095092773, "eval_rouge1": 0.1034, "eval_rouge2": 0.0342, "eval_rougeL": 0.0832, "eval_rougeLsum": 0.083, "eval_runtime": 9.5901, "eval_samples_per_second": 11.47, "eval_steps_per_second": 1.46, "step": 220 }, { "epoch": 8.98, "eval_gen_len": 15.8, "eval_loss": 18.190088272094727, "eval_rouge1": 0.0969, "eval_rouge2": 0.0344, "eval_rougeL": 0.0818, "eval_rougeLsum": 0.0815, "eval_runtime": 9.5799, "eval_samples_per_second": 11.482, "eval_steps_per_second": 1.461, "step": 247 }, { "epoch": 10.0, "eval_gen_len": 15.9455, "eval_loss": 17.473072052001953, "eval_rouge1": 0.1041, "eval_rouge2": 0.0337, "eval_rougeL": 0.0857, "eval_rougeLsum": 0.0853, "eval_runtime": 9.5524, "eval_samples_per_second": 11.515, "eval_steps_per_second": 1.466, "step": 275 }, { "epoch": 10.98, "eval_gen_len": 18.0182, "eval_loss": 16.60153579711914, "eval_rouge1": 0.1001, "eval_rouge2": 0.029, "eval_rougeL": 0.0828, "eval_rougeLsum": 0.0828, "eval_runtime": 9.5623, "eval_samples_per_second": 11.503, "eval_steps_per_second": 1.464, "step": 302 }, { "epoch": 12.0, "eval_gen_len": 16.9636, "eval_loss": 15.59052848815918, "eval_rouge1": 0.0875, "eval_rouge2": 0.0222, "eval_rougeL": 0.0724, "eval_rougeLsum": 0.0724, "eval_runtime": 9.5436, "eval_samples_per_second": 11.526, "eval_steps_per_second": 1.467, "step": 330 }, { "epoch": 12.98, "eval_gen_len": 15.1455, "eval_loss": 14.362702369689941, "eval_rouge1": 0.0657, "eval_rouge2": 0.0153, "eval_rougeL": 0.0545, "eval_rougeLsum": 0.0543, "eval_runtime": 9.5681, "eval_samples_per_second": 11.497, "eval_steps_per_second": 1.463, "step": 357 }, { "epoch": 14.0, "eval_gen_len": 13.2273, "eval_loss": 12.995957374572754, "eval_rouge1": 0.0449, "eval_rouge2": 0.0083, "eval_rougeL": 0.0363, "eval_rougeLsum": 0.0361, "eval_runtime": 9.506, "eval_samples_per_second": 11.572, "eval_steps_per_second": 1.473, "step": 385 }, { "epoch": 14.98, "eval_gen_len": 13.1818, "eval_loss": 11.857050895690918, "eval_rouge1": 0.0337, "eval_rouge2": 0.0071, "eval_rougeL": 0.0303, "eval_rougeLsum": 0.0304, "eval_runtime": 9.5174, "eval_samples_per_second": 11.558, "eval_steps_per_second": 1.471, "step": 412 }, { "epoch": 16.0, "eval_gen_len": 12.0545, "eval_loss": 10.728998184204102, "eval_rouge1": 0.0137, "eval_rouge2": 0.002, "eval_rougeL": 0.0114, "eval_rougeLsum": 0.0112, "eval_runtime": 9.5263, "eval_samples_per_second": 11.547, "eval_steps_per_second": 1.47, "step": 440 }, { "epoch": 16.98, "eval_gen_len": 14.0818, "eval_loss": 9.675826072692871, "eval_rouge1": 0.0051, "eval_rouge2": 0.0007, "eval_rougeL": 0.005, "eval_rougeLsum": 0.0049, "eval_runtime": 9.4916, "eval_samples_per_second": 11.589, "eval_steps_per_second": 1.475, "step": 467 }, { "epoch": 18.0, "eval_gen_len": 17.2545, "eval_loss": 8.581353187561035, "eval_rouge1": 0.0013, "eval_rouge2": 0.0, "eval_rougeL": 0.0013, "eval_rougeLsum": 0.0013, "eval_runtime": 9.4979, "eval_samples_per_second": 11.581, "eval_steps_per_second": 1.474, "step": 495 }, { "epoch": 18.18, "grad_norm": 5.896080017089844, "learning_rate": 1.8360493827160496e-05, "loss": 16.7039, "step": 500 }, { "epoch": 18.98, "eval_gen_len": 17.5455, "eval_loss": 7.5404815673828125, "eval_rouge1": 0.0015, "eval_rouge2": 0.0, "eval_rougeL": 0.0014, "eval_rougeLsum": 0.0014, "eval_runtime": 9.5287, "eval_samples_per_second": 11.544, "eval_steps_per_second": 1.469, "step": 522 }, { "epoch": 20.0, "eval_gen_len": 18.5636, "eval_loss": 6.503509044647217, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.5264, "eval_samples_per_second": 11.547, "eval_steps_per_second": 1.47, "step": 550 }, { "epoch": 20.98, "eval_gen_len": 19.0, "eval_loss": 5.559322834014893, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.5196, "eval_samples_per_second": 11.555, "eval_steps_per_second": 1.471, "step": 577 }, { "epoch": 22.0, "eval_gen_len": 19.0, "eval_loss": 4.715544700622559, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.5225, "eval_samples_per_second": 11.552, "eval_steps_per_second": 1.47, "step": 605 }, { "epoch": 22.98, "eval_gen_len": 19.0, "eval_loss": 4.022518634796143, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.5406, "eval_samples_per_second": 11.53, "eval_steps_per_second": 1.467, "step": 632 }, { "epoch": 24.0, "eval_gen_len": 19.0, "eval_loss": 3.401372194290161, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.5371, "eval_samples_per_second": 11.534, "eval_steps_per_second": 1.468, "step": 660 }, { "epoch": 24.98, "eval_gen_len": 19.0, "eval_loss": 2.971496105194092, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5401, "eval_samples_per_second": 11.53, "eval_steps_per_second": 1.467, "step": 687 }, { "epoch": 26.0, "eval_gen_len": 18.7273, "eval_loss": 2.6538097858428955, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.5288, "eval_samples_per_second": 11.544, "eval_steps_per_second": 1.469, "step": 715 }, { "epoch": 26.98, "eval_gen_len": 14.5, "eval_loss": 2.4105889797210693, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.478, "eval_samples_per_second": 11.606, "eval_steps_per_second": 1.477, "step": 742 }, { "epoch": 28.0, "eval_gen_len": 8.2636, "eval_loss": 2.2297565937042236, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4449, "eval_samples_per_second": 11.646, "eval_steps_per_second": 1.482, "step": 770 }, { "epoch": 28.98, "eval_gen_len": 7.1455, "eval_loss": 2.1042511463165283, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4539, "eval_samples_per_second": 11.635, "eval_steps_per_second": 1.481, "step": 797 }, { "epoch": 30.0, "eval_gen_len": 6.5545, "eval_loss": 2.011505126953125, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4645, "eval_samples_per_second": 11.622, "eval_steps_per_second": 1.479, "step": 825 }, { "epoch": 30.98, "eval_gen_len": 6.0273, "eval_loss": 1.927019715309143, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5048, "eval_samples_per_second": 11.573, "eval_steps_per_second": 1.473, "step": 852 }, { "epoch": 32.0, "eval_gen_len": 5.4091, "eval_loss": 1.8653310537338257, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.49, "eval_samples_per_second": 11.591, "eval_steps_per_second": 1.475, "step": 880 }, { "epoch": 32.98, "eval_gen_len": 5.1727, "eval_loss": 1.8196451663970947, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4647, "eval_samples_per_second": 11.622, "eval_steps_per_second": 1.479, "step": 907 }, { "epoch": 34.0, "eval_gen_len": 5.1909, "eval_loss": 1.7811836004257202, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4798, "eval_samples_per_second": 11.604, "eval_steps_per_second": 1.477, "step": 935 }, { "epoch": 34.98, "eval_gen_len": 5.3182, "eval_loss": 1.7490955591201782, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4662, "eval_samples_per_second": 11.62, "eval_steps_per_second": 1.479, "step": 962 }, { "epoch": 36.0, "eval_gen_len": 5.3091, "eval_loss": 1.721943974494934, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.4582, "eval_samples_per_second": 11.63, "eval_steps_per_second": 1.48, "step": 990 }, { "epoch": 36.36, "grad_norm": 3.1814663410186768, "learning_rate": 1.6720987654320987e-05, "loss": 3.9957, "step": 1000 }, { "epoch": 36.98, "eval_gen_len": 5.3273, "eval_loss": 1.69921875, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5006, "eval_samples_per_second": 11.578, "eval_steps_per_second": 1.474, "step": 1017 }, { "epoch": 38.0, "eval_gen_len": 5.2091, "eval_loss": 1.678276777267456, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.4859, "eval_samples_per_second": 11.596, "eval_steps_per_second": 1.476, "step": 1045 }, { "epoch": 38.98, "eval_gen_len": 5.9273, "eval_loss": 1.657922625541687, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.4757, "eval_samples_per_second": 11.609, "eval_steps_per_second": 1.477, "step": 1072 }, { "epoch": 40.0, "eval_gen_len": 6.1909, "eval_loss": 1.6388959884643555, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.4468, "eval_samples_per_second": 11.644, "eval_steps_per_second": 1.482, "step": 1100 }, { "epoch": 40.98, "eval_gen_len": 5.6818, "eval_loss": 1.622721552848816, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4505, "eval_samples_per_second": 11.64, "eval_steps_per_second": 1.481, "step": 1127 }, { "epoch": 42.0, "eval_gen_len": 5.0818, "eval_loss": 1.6065285205841064, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4897, "eval_samples_per_second": 11.592, "eval_steps_per_second": 1.475, "step": 1155 }, { "epoch": 42.98, "eval_gen_len": 5.5091, "eval_loss": 1.5892502069473267, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.4662, "eval_samples_per_second": 11.62, "eval_steps_per_second": 1.479, "step": 1182 }, { "epoch": 44.0, "eval_gen_len": 6.0636, "eval_loss": 1.568816900253296, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4716, "eval_samples_per_second": 11.614, "eval_steps_per_second": 1.478, "step": 1210 }, { "epoch": 44.98, "eval_gen_len": 6.6273, "eval_loss": 1.5522329807281494, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4809, "eval_samples_per_second": 11.602, "eval_steps_per_second": 1.477, "step": 1237 }, { "epoch": 46.0, "eval_gen_len": 6.5364, "eval_loss": 1.539686918258667, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4493, "eval_samples_per_second": 11.641, "eval_steps_per_second": 1.482, "step": 1265 }, { "epoch": 46.98, "eval_gen_len": 6.3909, "eval_loss": 1.527640700340271, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.441, "eval_samples_per_second": 11.651, "eval_steps_per_second": 1.483, "step": 1292 }, { "epoch": 48.0, "eval_gen_len": 7.2818, "eval_loss": 1.5062768459320068, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4248, "eval_samples_per_second": 11.671, "eval_steps_per_second": 1.485, "step": 1320 }, { "epoch": 48.98, "eval_gen_len": 7.9273, "eval_loss": 1.4878034591674805, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.5189, "eval_samples_per_second": 11.556, "eval_steps_per_second": 1.471, "step": 1347 }, { "epoch": 50.0, "eval_gen_len": 7.5636, "eval_loss": 1.4774630069732666, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.4679, "eval_samples_per_second": 11.618, "eval_steps_per_second": 1.479, "step": 1375 }, { "epoch": 50.98, "eval_gen_len": 8.2273, "eval_loss": 1.4622873067855835, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.4582, "eval_samples_per_second": 11.63, "eval_steps_per_second": 1.48, "step": 1402 }, { "epoch": 52.0, "eval_gen_len": 8.5636, "eval_loss": 1.4518091678619385, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.4604, "eval_samples_per_second": 11.627, "eval_steps_per_second": 1.48, "step": 1430 }, { "epoch": 52.98, "eval_gen_len": 8.2545, "eval_loss": 1.4443649053573608, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.4485, "eval_samples_per_second": 11.642, "eval_steps_per_second": 1.482, "step": 1457 }, { "epoch": 54.0, "eval_gen_len": 7.9545, "eval_loss": 1.4318201541900635, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.5193, "eval_samples_per_second": 11.555, "eval_steps_per_second": 1.471, "step": 1485 }, { "epoch": 54.55, "grad_norm": 4.463714599609375, "learning_rate": 1.5078189300411523e-05, "loss": 1.9182, "step": 1500 }, { "epoch": 54.98, "eval_gen_len": 8.8273, "eval_loss": 1.4121521711349487, "eval_rouge1": 0.0009, "eval_rouge2": 0.0, "eval_rougeL": 0.0009, "eval_rougeLsum": 0.0009, "eval_runtime": 9.4998, "eval_samples_per_second": 11.579, "eval_steps_per_second": 1.474, "step": 1512 }, { "epoch": 56.0, "eval_gen_len": 7.8455, "eval_loss": 1.400160789489746, "eval_rouge1": 0.0012, "eval_rouge2": 0.0, "eval_rougeL": 0.0013, "eval_rougeLsum": 0.0012, "eval_runtime": 9.4886, "eval_samples_per_second": 11.593, "eval_steps_per_second": 1.475, "step": 1540 }, { "epoch": 56.98, "eval_gen_len": 7.7909, "eval_loss": 1.3821996450424194, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.4712, "eval_samples_per_second": 11.614, "eval_steps_per_second": 1.478, "step": 1567 }, { "epoch": 58.0, "eval_gen_len": 8.5545, "eval_loss": 1.370047688484192, "eval_rouge1": 0.001, "eval_rouge2": 0.0, "eval_rougeL": 0.001, "eval_rougeLsum": 0.001, "eval_runtime": 9.4659, "eval_samples_per_second": 11.621, "eval_steps_per_second": 1.479, "step": 1595 }, { "epoch": 58.98, "eval_gen_len": 8.4636, "eval_loss": 1.3583662509918213, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.472, "eval_samples_per_second": 11.613, "eval_steps_per_second": 1.478, "step": 1622 }, { "epoch": 60.0, "eval_gen_len": 8.0909, "eval_loss": 1.3374855518341064, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.4676, "eval_samples_per_second": 11.619, "eval_steps_per_second": 1.479, "step": 1650 }, { "epoch": 60.98, "eval_gen_len": 8.7, "eval_loss": 1.3220137357711792, "eval_rouge1": 0.0015, "eval_rouge2": 0.0, "eval_rougeL": 0.0016, "eval_rougeLsum": 0.0016, "eval_runtime": 9.504, "eval_samples_per_second": 11.574, "eval_steps_per_second": 1.473, "step": 1677 }, { "epoch": 62.0, "eval_gen_len": 8.5818, "eval_loss": 1.3142321109771729, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.4996, "eval_samples_per_second": 11.579, "eval_steps_per_second": 1.474, "step": 1705 }, { "epoch": 62.98, "eval_gen_len": 8.7727, "eval_loss": 1.3096009492874146, "eval_rouge1": 0.0016, "eval_rouge2": 0.0, "eval_rougeL": 0.0017, "eval_rougeLsum": 0.0017, "eval_runtime": 9.463, "eval_samples_per_second": 11.624, "eval_steps_per_second": 1.479, "step": 1732 }, { "epoch": 64.0, "eval_gen_len": 9.0455, "eval_loss": 1.2836058139801025, "eval_rouge1": 0.0013, "eval_rouge2": 0.0003, "eval_rougeL": 0.0012, "eval_rougeLsum": 0.0013, "eval_runtime": 9.4725, "eval_samples_per_second": 11.613, "eval_steps_per_second": 1.478, "step": 1760 }, { "epoch": 64.98, "eval_gen_len": 9.0, "eval_loss": 1.2642889022827148, "eval_rouge1": 0.002, "eval_rouge2": 0.0003, "eval_rougeL": 0.002, "eval_rougeLsum": 0.002, "eval_runtime": 9.6032, "eval_samples_per_second": 11.454, "eval_steps_per_second": 1.458, "step": 1787 }, { "epoch": 66.0, "eval_gen_len": 8.8182, "eval_loss": 1.2471901178359985, "eval_rouge1": 0.0052, "eval_rouge2": 0.001, "eval_rougeL": 0.0052, "eval_rougeLsum": 0.0052, "eval_runtime": 9.4575, "eval_samples_per_second": 11.631, "eval_steps_per_second": 1.48, "step": 1815 }, { "epoch": 66.98, "eval_gen_len": 9.6636, "eval_loss": 1.226246953010559, "eval_rouge1": 0.0086, "eval_rouge2": 0.0013, "eval_rougeL": 0.0088, "eval_rougeLsum": 0.0088, "eval_runtime": 9.4953, "eval_samples_per_second": 11.585, "eval_steps_per_second": 1.474, "step": 1842 }, { "epoch": 68.0, "eval_gen_len": 9.5273, "eval_loss": 1.2092421054840088, "eval_rouge1": 0.0107, "eval_rouge2": 0.0019, "eval_rougeL": 0.009, "eval_rougeLsum": 0.0088, "eval_runtime": 9.5273, "eval_samples_per_second": 11.546, "eval_steps_per_second": 1.469, "step": 1870 }, { "epoch": 68.98, "eval_gen_len": 9.8091, "eval_loss": 1.1953155994415283, "eval_rouge1": 0.0123, "eval_rouge2": 0.0032, "eval_rougeL": 0.012, "eval_rougeLsum": 0.0118, "eval_runtime": 9.4864, "eval_samples_per_second": 11.596, "eval_steps_per_second": 1.476, "step": 1897 }, { "epoch": 70.0, "eval_gen_len": 9.3636, "eval_loss": 1.1815446615219116, "eval_rouge1": 0.0104, "eval_rouge2": 0.0033, "eval_rougeL": 0.0101, "eval_rougeLsum": 0.0098, "eval_runtime": 9.4807, "eval_samples_per_second": 11.603, "eval_steps_per_second": 1.477, "step": 1925 }, { "epoch": 70.98, "eval_gen_len": 9.1091, "eval_loss": 1.161879301071167, "eval_rouge1": 0.0064, "eval_rouge2": 0.0008, "eval_rougeL": 0.0056, "eval_rougeLsum": 0.0055, "eval_runtime": 9.4717, "eval_samples_per_second": 11.613, "eval_steps_per_second": 1.478, "step": 1952 }, { "epoch": 72.0, "eval_gen_len": 9.3273, "eval_loss": 1.139123797416687, "eval_rouge1": 0.0105, "eval_rouge2": 0.002, "eval_rougeL": 0.0099, "eval_rougeLsum": 0.0098, "eval_runtime": 9.4501, "eval_samples_per_second": 11.64, "eval_steps_per_second": 1.481, "step": 1980 }, { "epoch": 72.73, "grad_norm": 3.556974172592163, "learning_rate": 1.34320987654321e-05, "loss": 1.6026, "step": 2000 }, { "epoch": 72.98, "eval_gen_len": 9.7364, "eval_loss": 1.1243102550506592, "eval_rouge1": 0.0108, "eval_rouge2": 0.0019, "eval_rougeL": 0.0104, "eval_rougeLsum": 0.0101, "eval_runtime": 9.4726, "eval_samples_per_second": 11.612, "eval_steps_per_second": 1.478, "step": 2007 }, { "epoch": 74.0, "eval_gen_len": 10.6182, "eval_loss": 1.1061749458312988, "eval_rouge1": 0.0204, "eval_rouge2": 0.0031, "eval_rougeL": 0.0185, "eval_rougeLsum": 0.0183, "eval_runtime": 9.4719, "eval_samples_per_second": 11.613, "eval_steps_per_second": 1.478, "step": 2035 }, { "epoch": 74.98, "eval_gen_len": 10.1545, "eval_loss": 1.0930193662643433, "eval_rouge1": 0.0159, "eval_rouge2": 0.0017, "eval_rougeL": 0.0145, "eval_rougeLsum": 0.0143, "eval_runtime": 9.4961, "eval_samples_per_second": 11.584, "eval_steps_per_second": 1.474, "step": 2062 }, { "epoch": 76.0, "eval_gen_len": 10.7364, "eval_loss": 1.077184796333313, "eval_rouge1": 0.0151, "eval_rouge2": 0.0023, "eval_rougeL": 0.0133, "eval_rougeLsum": 0.0133, "eval_runtime": 9.5109, "eval_samples_per_second": 11.566, "eval_steps_per_second": 1.472, "step": 2090 }, { "epoch": 76.98, "eval_gen_len": 10.7, "eval_loss": 1.0578068494796753, "eval_rouge1": 0.0195, "eval_rouge2": 0.0044, "eval_rougeL": 0.0178, "eval_rougeLsum": 0.0178, "eval_runtime": 9.4786, "eval_samples_per_second": 11.605, "eval_steps_per_second": 1.477, "step": 2117 }, { "epoch": 78.0, "eval_gen_len": 11.0636, "eval_loss": 1.0393445491790771, "eval_rouge1": 0.0237, "eval_rouge2": 0.0048, "eval_rougeL": 0.0214, "eval_rougeLsum": 0.0211, "eval_runtime": 9.4415, "eval_samples_per_second": 11.651, "eval_steps_per_second": 1.483, "step": 2145 }, { "epoch": 78.98, "eval_gen_len": 10.1455, "eval_loss": 1.0262919664382935, "eval_rouge1": 0.0121, "eval_rouge2": 0.0014, "eval_rougeL": 0.0113, "eval_rougeLsum": 0.0112, "eval_runtime": 9.4285, "eval_samples_per_second": 11.667, "eval_steps_per_second": 1.485, "step": 2172 }, { "epoch": 80.0, "eval_gen_len": 11.3818, "eval_loss": 1.0064616203308105, "eval_rouge1": 0.0273, "eval_rouge2": 0.0048, "eval_rougeL": 0.0238, "eval_rougeLsum": 0.0235, "eval_runtime": 9.448, "eval_samples_per_second": 11.643, "eval_steps_per_second": 1.482, "step": 2200 }, { "epoch": 80.98, "eval_gen_len": 10.9545, "eval_loss": 0.98997563123703, "eval_rouge1": 0.0228, "eval_rouge2": 0.0042, "eval_rougeL": 0.0197, "eval_rougeLsum": 0.0196, "eval_runtime": 9.4798, "eval_samples_per_second": 11.604, "eval_steps_per_second": 1.477, "step": 2227 }, { "epoch": 82.0, "eval_gen_len": 10.9909, "eval_loss": 0.9675103425979614, "eval_rouge1": 0.024, "eval_rouge2": 0.0046, "eval_rougeL": 0.0204, "eval_rougeLsum": 0.0202, "eval_runtime": 9.4448, "eval_samples_per_second": 11.647, "eval_steps_per_second": 1.482, "step": 2255 }, { "epoch": 82.98, "eval_gen_len": 10.7182, "eval_loss": 0.9506540298461914, "eval_rouge1": 0.0244, "eval_rouge2": 0.0051, "eval_rougeL": 0.0211, "eval_rougeLsum": 0.0211, "eval_runtime": 9.5012, "eval_samples_per_second": 11.578, "eval_steps_per_second": 1.474, "step": 2282 }, { "epoch": 84.0, "eval_gen_len": 10.8636, "eval_loss": 0.9340613484382629, "eval_rouge1": 0.0249, "eval_rouge2": 0.0058, "eval_rougeL": 0.022, "eval_rougeLsum": 0.0223, "eval_runtime": 9.447, "eval_samples_per_second": 11.644, "eval_steps_per_second": 1.482, "step": 2310 }, { "epoch": 84.98, "eval_gen_len": 10.0909, "eval_loss": 0.9161014556884766, "eval_rouge1": 0.0243, "eval_rouge2": 0.0077, "eval_rougeL": 0.0224, "eval_rougeLsum": 0.0226, "eval_runtime": 9.4492, "eval_samples_per_second": 11.641, "eval_steps_per_second": 1.482, "step": 2337 }, { "epoch": 86.0, "eval_gen_len": 9.7182, "eval_loss": 0.8942736983299255, "eval_rouge1": 0.0176, "eval_rouge2": 0.0035, "eval_rougeL": 0.0152, "eval_rougeLsum": 0.0153, "eval_runtime": 9.4727, "eval_samples_per_second": 11.612, "eval_steps_per_second": 1.478, "step": 2365 }, { "epoch": 86.98, "eval_gen_len": 10.0, "eval_loss": 0.8758471608161926, "eval_rouge1": 0.0239, "eval_rouge2": 0.0093, "eval_rougeL": 0.0214, "eval_rougeLsum": 0.0215, "eval_runtime": 9.4864, "eval_samples_per_second": 11.596, "eval_steps_per_second": 1.476, "step": 2392 }, { "epoch": 88.0, "eval_gen_len": 10.2273, "eval_loss": 0.854742169380188, "eval_rouge1": 0.0254, "eval_rouge2": 0.0116, "eval_rougeL": 0.0237, "eval_rougeLsum": 0.0238, "eval_runtime": 9.5043, "eval_samples_per_second": 11.574, "eval_steps_per_second": 1.473, "step": 2420 }, { "epoch": 88.98, "eval_gen_len": 10.2545, "eval_loss": 0.8352662324905396, "eval_rouge1": 0.0196, "eval_rouge2": 0.007, "eval_rougeL": 0.0183, "eval_rougeLsum": 0.0182, "eval_runtime": 9.455, "eval_samples_per_second": 11.634, "eval_steps_per_second": 1.481, "step": 2447 }, { "epoch": 90.0, "eval_gen_len": 9.2364, "eval_loss": 0.8150736093521118, "eval_rouge1": 0.0104, "eval_rouge2": 0.0032, "eval_rougeL": 0.0095, "eval_rougeLsum": 0.0098, "eval_runtime": 9.4416, "eval_samples_per_second": 11.651, "eval_steps_per_second": 1.483, "step": 2475 }, { "epoch": 90.91, "grad_norm": 1.338108777999878, "learning_rate": 1.1786008230452676e-05, "loss": 1.2934, "step": 2500 }, { "epoch": 90.98, "eval_gen_len": 9.3455, "eval_loss": 0.7920636534690857, "eval_rouge1": 0.01, "eval_rouge2": 0.0036, "eval_rougeL": 0.0095, "eval_rougeLsum": 0.0096, "eval_runtime": 9.4783, "eval_samples_per_second": 11.605, "eval_steps_per_second": 1.477, "step": 2502 }, { "epoch": 92.0, "eval_gen_len": 9.2545, "eval_loss": 0.7697137594223022, "eval_rouge1": 0.012, "eval_rouge2": 0.0051, "eval_rougeL": 0.0111, "eval_rougeLsum": 0.011, "eval_runtime": 9.5826, "eval_samples_per_second": 11.479, "eval_steps_per_second": 1.461, "step": 2530 }, { "epoch": 92.98, "eval_gen_len": 8.8455, "eval_loss": 0.7492441534996033, "eval_rouge1": 0.0106, "eval_rouge2": 0.0048, "eval_rougeL": 0.0098, "eval_rougeLsum": 0.0098, "eval_runtime": 9.4826, "eval_samples_per_second": 11.6, "eval_steps_per_second": 1.476, "step": 2557 }, { "epoch": 94.0, "eval_gen_len": 9.4, "eval_loss": 0.7301111221313477, "eval_rouge1": 0.0112, "eval_rouge2": 0.0053, "eval_rougeL": 0.0095, "eval_rougeLsum": 0.0095, "eval_runtime": 9.4682, "eval_samples_per_second": 11.618, "eval_steps_per_second": 1.479, "step": 2585 }, { "epoch": 94.98, "eval_gen_len": 8.9, "eval_loss": 0.7125746607780457, "eval_rouge1": 0.0069, "eval_rouge2": 0.0026, "eval_rougeL": 0.0063, "eval_rougeLsum": 0.0064, "eval_runtime": 9.4699, "eval_samples_per_second": 11.616, "eval_steps_per_second": 1.478, "step": 2612 }, { "epoch": 96.0, "eval_gen_len": 9.2, "eval_loss": 0.6931119561195374, "eval_rouge1": 0.0107, "eval_rouge2": 0.0053, "eval_rougeL": 0.0088, "eval_rougeLsum": 0.0089, "eval_runtime": 9.4599, "eval_samples_per_second": 11.628, "eval_steps_per_second": 1.48, "step": 2640 }, { "epoch": 96.98, "eval_gen_len": 8.8909, "eval_loss": 0.6750566363334656, "eval_rouge1": 0.0063, "eval_rouge2": 0.0029, "eval_rougeL": 0.0049, "eval_rougeLsum": 0.0048, "eval_runtime": 9.4656, "eval_samples_per_second": 11.621, "eval_steps_per_second": 1.479, "step": 2667 }, { "epoch": 98.0, "eval_gen_len": 9.3182, "eval_loss": 0.6581041216850281, "eval_rouge1": 0.0091, "eval_rouge2": 0.005, "eval_rougeL": 0.0069, "eval_rougeLsum": 0.0071, "eval_runtime": 9.4802, "eval_samples_per_second": 11.603, "eval_steps_per_second": 1.477, "step": 2695 }, { "epoch": 98.98, "eval_gen_len": 9.1091, "eval_loss": 0.6413628458976746, "eval_rouge1": 0.0038, "eval_rouge2": 0.0027, "eval_rougeL": 0.0026, "eval_rougeLsum": 0.0034, "eval_runtime": 9.4864, "eval_samples_per_second": 11.595, "eval_steps_per_second": 1.476, "step": 2722 }, { "epoch": 100.0, "eval_gen_len": 9.5273, "eval_loss": 0.6237995028495789, "eval_rouge1": 0.0125, "eval_rouge2": 0.0088, "eval_rougeL": 0.0109, "eval_rougeLsum": 0.0109, "eval_runtime": 9.479, "eval_samples_per_second": 11.605, "eval_steps_per_second": 1.477, "step": 2750 }, { "epoch": 100.98, "eval_gen_len": 9.3727, "eval_loss": 0.6053850054740906, "eval_rouge1": 0.0053, "eval_rouge2": 0.0027, "eval_rougeL": 0.0042, "eval_rougeLsum": 0.0045, "eval_runtime": 9.4604, "eval_samples_per_second": 11.627, "eval_steps_per_second": 1.48, "step": 2777 }, { "epoch": 102.0, "eval_gen_len": 9.7091, "eval_loss": 0.5907317996025085, "eval_rouge1": 0.0115, "eval_rouge2": 0.0096, "eval_rougeL": 0.0108, "eval_rougeLsum": 0.0109, "eval_runtime": 9.4533, "eval_samples_per_second": 11.636, "eval_steps_per_second": 1.481, "step": 2805 }, { "epoch": 102.98, "eval_gen_len": 9.5364, "eval_loss": 0.5752558708190918, "eval_rouge1": 0.0055, "eval_rouge2": 0.0045, "eval_rougeL": 0.005, "eval_rougeLsum": 0.0053, "eval_runtime": 9.4454, "eval_samples_per_second": 11.646, "eval_steps_per_second": 1.482, "step": 2832 }, { "epoch": 104.0, "eval_gen_len": 9.7273, "eval_loss": 0.562364399433136, "eval_rouge1": 0.01, "eval_rouge2": 0.0057, "eval_rougeL": 0.0091, "eval_rougeLsum": 0.0094, "eval_runtime": 9.4478, "eval_samples_per_second": 11.643, "eval_steps_per_second": 1.482, "step": 2860 }, { "epoch": 104.98, "eval_gen_len": 9.6909, "eval_loss": 0.5496523380279541, "eval_rouge1": 0.0078, "eval_rouge2": 0.0038, "eval_rougeL": 0.0066, "eval_rougeLsum": 0.0069, "eval_runtime": 9.4963, "eval_samples_per_second": 11.583, "eval_steps_per_second": 1.474, "step": 2887 }, { "epoch": 106.0, "eval_gen_len": 9.6, "eval_loss": 0.5380507111549377, "eval_rouge1": 0.0077, "eval_rouge2": 0.0041, "eval_rougeL": 0.0068, "eval_rougeLsum": 0.0071, "eval_runtime": 9.4581, "eval_samples_per_second": 11.63, "eval_steps_per_second": 1.48, "step": 2915 }, { "epoch": 106.98, "eval_gen_len": 9.2909, "eval_loss": 0.5269507765769958, "eval_rouge1": 0.0109, "eval_rouge2": 0.0068, "eval_rougeL": 0.0101, "eval_rougeLsum": 0.0103, "eval_runtime": 9.4431, "eval_samples_per_second": 11.649, "eval_steps_per_second": 1.483, "step": 2942 }, { "epoch": 108.0, "eval_gen_len": 8.9636, "eval_loss": 0.5166643857955933, "eval_rouge1": 0.0095, "eval_rouge2": 0.004, "eval_rougeL": 0.008, "eval_rougeLsum": 0.0079, "eval_runtime": 9.4573, "eval_samples_per_second": 11.631, "eval_steps_per_second": 1.48, "step": 2970 }, { "epoch": 108.98, "eval_gen_len": 9.3818, "eval_loss": 0.5079358816146851, "eval_rouge1": 0.0078, "eval_rouge2": 0.0035, "eval_rougeL": 0.0055, "eval_rougeLsum": 0.0059, "eval_runtime": 9.4478, "eval_samples_per_second": 11.643, "eval_steps_per_second": 1.482, "step": 2997 }, { "epoch": 109.09, "grad_norm": 2.720813512802124, "learning_rate": 1.0139917695473251e-05, "loss": 0.9194, "step": 3000 }, { "epoch": 110.0, "eval_gen_len": 9.2909, "eval_loss": 0.5007773041725159, "eval_rouge1": 0.0044, "eval_rouge2": 0.0019, "eval_rougeL": 0.0037, "eval_rougeLsum": 0.0033, "eval_runtime": 9.4749, "eval_samples_per_second": 11.61, "eval_steps_per_second": 1.478, "step": 3025 }, { "epoch": 110.98, "eval_gen_len": 9.2909, "eval_loss": 0.49183493852615356, "eval_rouge1": 0.0108, "eval_rouge2": 0.0069, "eval_rougeL": 0.0087, "eval_rougeLsum": 0.0088, "eval_runtime": 9.5004, "eval_samples_per_second": 11.578, "eval_steps_per_second": 1.474, "step": 3052 }, { "epoch": 112.0, "eval_gen_len": 9.2455, "eval_loss": 0.48342087864875793, "eval_rouge1": 0.0112, "eval_rouge2": 0.008, "eval_rougeL": 0.0092, "eval_rougeLsum": 0.009, "eval_runtime": 9.5074, "eval_samples_per_second": 11.57, "eval_steps_per_second": 1.473, "step": 3080 }, { "epoch": 112.98, "eval_gen_len": 8.9364, "eval_loss": 0.47708794474601746, "eval_rouge1": 0.0057, "eval_rouge2": 0.0031, "eval_rougeL": 0.0042, "eval_rougeLsum": 0.004, "eval_runtime": 9.5044, "eval_samples_per_second": 11.574, "eval_steps_per_second": 1.473, "step": 3107 }, { "epoch": 114.0, "eval_gen_len": 9.1364, "eval_loss": 0.47102922201156616, "eval_rouge1": 0.0122, "eval_rouge2": 0.0065, "eval_rougeL": 0.0089, "eval_rougeLsum": 0.0087, "eval_runtime": 9.4603, "eval_samples_per_second": 11.628, "eval_steps_per_second": 1.48, "step": 3135 }, { "epoch": 114.98, "eval_gen_len": 9.2182, "eval_loss": 0.4654105007648468, "eval_rouge1": 0.0102, "eval_rouge2": 0.0063, "eval_rougeL": 0.0085, "eval_rougeLsum": 0.0083, "eval_runtime": 9.4614, "eval_samples_per_second": 11.626, "eval_steps_per_second": 1.48, "step": 3162 }, { "epoch": 116.0, "eval_gen_len": 8.9455, "eval_loss": 0.4600731432437897, "eval_rouge1": 0.0119, "eval_rouge2": 0.0092, "eval_rougeL": 0.0104, "eval_rougeLsum": 0.0102, "eval_runtime": 9.461, "eval_samples_per_second": 11.627, "eval_steps_per_second": 1.48, "step": 3190 }, { "epoch": 116.98, "eval_gen_len": 8.4182, "eval_loss": 0.4526377320289612, "eval_rouge1": 0.0107, "eval_rouge2": 0.0082, "eval_rougeL": 0.0099, "eval_rougeLsum": 0.0097, "eval_runtime": 9.4731, "eval_samples_per_second": 11.612, "eval_steps_per_second": 1.478, "step": 3217 }, { "epoch": 118.0, "eval_gen_len": 8.4273, "eval_loss": 0.4467811584472656, "eval_rouge1": 0.0052, "eval_rouge2": 0.0044, "eval_rougeL": 0.0045, "eval_rougeLsum": 0.0045, "eval_runtime": 9.4604, "eval_samples_per_second": 11.627, "eval_steps_per_second": 1.48, "step": 3245 }, { "epoch": 118.98, "eval_gen_len": 8.3, "eval_loss": 0.4426099956035614, "eval_rouge1": 0.0054, "eval_rouge2": 0.0041, "eval_rougeL": 0.005, "eval_rougeLsum": 0.0052, "eval_runtime": 9.4891, "eval_samples_per_second": 11.592, "eval_steps_per_second": 1.475, "step": 3272 }, { "epoch": 120.0, "eval_gen_len": 8.2727, "eval_loss": 0.43666210770606995, "eval_rouge1": 0.0107, "eval_rouge2": 0.0079, "eval_rougeL": 0.0102, "eval_rougeLsum": 0.0101, "eval_runtime": 9.5096, "eval_samples_per_second": 11.567, "eval_steps_per_second": 1.472, "step": 3300 }, { "epoch": 120.98, "eval_gen_len": 8.7182, "eval_loss": 0.4338292181491852, "eval_rouge1": 0.0142, "eval_rouge2": 0.0102, "eval_rougeL": 0.0134, "eval_rougeLsum": 0.0131, "eval_runtime": 9.4732, "eval_samples_per_second": 11.612, "eval_steps_per_second": 1.478, "step": 3327 }, { "epoch": 122.0, "eval_gen_len": 7.5727, "eval_loss": 0.4293038547039032, "eval_rouge1": 0.0045, "eval_rouge2": 0.0035, "eval_rougeL": 0.0039, "eval_rougeLsum": 0.0039, "eval_runtime": 9.458, "eval_samples_per_second": 11.63, "eval_steps_per_second": 1.48, "step": 3355 }, { "epoch": 122.98, "eval_gen_len": 7.8818, "eval_loss": 0.4247772991657257, "eval_rouge1": 0.0082, "eval_rouge2": 0.0056, "eval_rougeL": 0.0078, "eval_rougeLsum": 0.0076, "eval_runtime": 9.4976, "eval_samples_per_second": 11.582, "eval_steps_per_second": 1.474, "step": 3382 }, { "epoch": 124.0, "eval_gen_len": 7.4273, "eval_loss": 0.4226304888725281, "eval_rouge1": 0.0047, "eval_rouge2": 0.0039, "eval_rougeL": 0.0047, "eval_rougeLsum": 0.0047, "eval_runtime": 9.4775, "eval_samples_per_second": 11.606, "eval_steps_per_second": 1.477, "step": 3410 }, { "epoch": 124.98, "eval_gen_len": 7.7091, "eval_loss": 0.4187226891517639, "eval_rouge1": 0.0096, "eval_rouge2": 0.0065, "eval_rougeL": 0.0097, "eval_rougeLsum": 0.0095, "eval_runtime": 9.4596, "eval_samples_per_second": 11.628, "eval_steps_per_second": 1.48, "step": 3437 }, { "epoch": 126.0, "eval_gen_len": 7.1364, "eval_loss": 0.4152156412601471, "eval_rouge1": 0.0026, "eval_rouge2": 0.0024, "eval_rougeL": 0.0026, "eval_rougeLsum": 0.0026, "eval_runtime": 9.4787, "eval_samples_per_second": 11.605, "eval_steps_per_second": 1.477, "step": 3465 }, { "epoch": 126.98, "eval_gen_len": 6.8909, "eval_loss": 0.4114760458469391, "eval_rouge1": 0.0026, "eval_rouge2": 0.0024, "eval_rougeL": 0.0026, "eval_rougeLsum": 0.0026, "eval_runtime": 9.4927, "eval_samples_per_second": 11.588, "eval_steps_per_second": 1.475, "step": 3492 }, { "epoch": 127.27, "grad_norm": 5.880601406097412, "learning_rate": 8.493827160493828e-06, "loss": 0.6369, "step": 3500 }, { "epoch": 128.0, "eval_gen_len": 6.7182, "eval_loss": 0.4087616503238678, "eval_rouge1": 0.0051, "eval_rouge2": 0.0048, "eval_rougeL": 0.0051, "eval_rougeLsum": 0.0051, "eval_runtime": 9.5116, "eval_samples_per_second": 11.565, "eval_steps_per_second": 1.472, "step": 3520 }, { "epoch": 128.98, "eval_gen_len": 7.3091, "eval_loss": 0.4050390422344208, "eval_rouge1": 0.0113, "eval_rouge2": 0.0097, "eval_rougeL": 0.0115, "eval_rougeLsum": 0.0115, "eval_runtime": 9.5251, "eval_samples_per_second": 11.548, "eval_steps_per_second": 1.47, "step": 3547 }, { "epoch": 130.0, "eval_gen_len": 7.2727, "eval_loss": 0.40342459082603455, "eval_rouge1": 0.0097, "eval_rouge2": 0.0086, "eval_rougeL": 0.0098, "eval_rougeLsum": 0.0099, "eval_runtime": 9.5061, "eval_samples_per_second": 11.572, "eval_steps_per_second": 1.473, "step": 3575 }, { "epoch": 130.98, "eval_gen_len": 6.9455, "eval_loss": 0.39917439222335815, "eval_rouge1": 0.0096, "eval_rouge2": 0.0081, "eval_rougeL": 0.0097, "eval_rougeLsum": 0.0097, "eval_runtime": 9.5045, "eval_samples_per_second": 11.573, "eval_steps_per_second": 1.473, "step": 3602 }, { "epoch": 132.0, "eval_gen_len": 6.6, "eval_loss": 0.3954027593135834, "eval_rouge1": 0.0053, "eval_rouge2": 0.0056, "eval_rougeL": 0.0053, "eval_rougeLsum": 0.0061, "eval_runtime": 9.4882, "eval_samples_per_second": 11.593, "eval_steps_per_second": 1.476, "step": 3630 }, { "epoch": 132.98, "eval_gen_len": 6.6727, "eval_loss": 0.3916667103767395, "eval_rouge1": 0.0061, "eval_rouge2": 0.0056, "eval_rougeL": 0.006, "eval_rougeLsum": 0.0061, "eval_runtime": 9.4921, "eval_samples_per_second": 11.589, "eval_steps_per_second": 1.475, "step": 3657 }, { "epoch": 134.0, "eval_gen_len": 6.9455, "eval_loss": 0.3922131359577179, "eval_rouge1": 0.0084, "eval_rouge2": 0.0072, "eval_rougeL": 0.0082, "eval_rougeLsum": 0.0086, "eval_runtime": 9.4995, "eval_samples_per_second": 11.58, "eval_steps_per_second": 1.474, "step": 3685 }, { "epoch": 134.98, "eval_gen_len": 6.3727, "eval_loss": 0.38674217462539673, "eval_rouge1": 0.0052, "eval_rouge2": 0.003, "eval_rougeL": 0.0046, "eval_rougeLsum": 0.0046, "eval_runtime": 9.4561, "eval_samples_per_second": 11.633, "eval_steps_per_second": 1.481, "step": 3712 }, { "epoch": 136.0, "eval_gen_len": 6.6273, "eval_loss": 0.38475027680397034, "eval_rouge1": 0.009, "eval_rouge2": 0.0061, "eval_rougeL": 0.0083, "eval_rougeLsum": 0.0086, "eval_runtime": 9.4487, "eval_samples_per_second": 11.642, "eval_steps_per_second": 1.482, "step": 3740 }, { "epoch": 136.98, "eval_gen_len": 6.4455, "eval_loss": 0.37981557846069336, "eval_rouge1": 0.0123, "eval_rouge2": 0.0095, "eval_rougeL": 0.0119, "eval_rougeLsum": 0.0121, "eval_runtime": 9.4838, "eval_samples_per_second": 11.599, "eval_steps_per_second": 1.476, "step": 3767 }, { "epoch": 138.0, "eval_gen_len": 6.6909, "eval_loss": 0.3788022994995117, "eval_rouge1": 0.0138, "eval_rouge2": 0.01, "eval_rougeL": 0.0132, "eval_rougeLsum": 0.0133, "eval_runtime": 9.4736, "eval_samples_per_second": 11.611, "eval_steps_per_second": 1.478, "step": 3795 }, { "epoch": 138.98, "eval_gen_len": 6.3909, "eval_loss": 0.3755718171596527, "eval_rouge1": 0.0119, "eval_rouge2": 0.0085, "eval_rougeL": 0.0116, "eval_rougeLsum": 0.0116, "eval_runtime": 9.4922, "eval_samples_per_second": 11.588, "eval_steps_per_second": 1.475, "step": 3822 }, { "epoch": 140.0, "eval_gen_len": 6.6545, "eval_loss": 0.3745150864124298, "eval_rouge1": 0.0135, "eval_rouge2": 0.0095, "eval_rougeL": 0.013, "eval_rougeLsum": 0.013, "eval_runtime": 9.5082, "eval_samples_per_second": 11.569, "eval_steps_per_second": 1.472, "step": 3850 }, { "epoch": 140.98, "eval_gen_len": 6.9636, "eval_loss": 0.37223342061042786, "eval_rouge1": 0.0175, "eval_rouge2": 0.0123, "eval_rougeL": 0.0171, "eval_rougeLsum": 0.0168, "eval_runtime": 9.506, "eval_samples_per_second": 11.572, "eval_steps_per_second": 1.473, "step": 3877 }, { "epoch": 142.0, "eval_gen_len": 7.0727, "eval_loss": 0.36922305822372437, "eval_rouge1": 0.0188, "eval_rouge2": 0.0127, "eval_rougeL": 0.0183, "eval_rougeLsum": 0.018, "eval_runtime": 9.4985, "eval_samples_per_second": 11.581, "eval_steps_per_second": 1.474, "step": 3905 }, { "epoch": 142.98, "eval_gen_len": 7.2727, "eval_loss": 0.3674834668636322, "eval_rouge1": 0.0201, "eval_rouge2": 0.0136, "eval_rougeL": 0.0197, "eval_rougeLsum": 0.0194, "eval_runtime": 9.4928, "eval_samples_per_second": 11.588, "eval_steps_per_second": 1.475, "step": 3932 }, { "epoch": 144.0, "eval_gen_len": 7.1818, "eval_loss": 0.36526089906692505, "eval_rouge1": 0.0215, "eval_rouge2": 0.0139, "eval_rougeL": 0.0211, "eval_rougeLsum": 0.0208, "eval_runtime": 9.4987, "eval_samples_per_second": 11.581, "eval_steps_per_second": 1.474, "step": 3960 }, { "epoch": 144.98, "eval_gen_len": 7.2, "eval_loss": 0.36316850781440735, "eval_rouge1": 0.0209, "eval_rouge2": 0.0128, "eval_rougeL": 0.0201, "eval_rougeLsum": 0.0202, "eval_runtime": 9.4792, "eval_samples_per_second": 11.604, "eval_steps_per_second": 1.477, "step": 3987 }, { "epoch": 145.45, "grad_norm": 0.6945245265960693, "learning_rate": 6.847736625514404e-06, "loss": 0.5099, "step": 4000 }, { "epoch": 146.0, "eval_gen_len": 7.1364, "eval_loss": 0.3603822886943817, "eval_rouge1": 0.022, "eval_rouge2": 0.0145, "eval_rougeL": 0.0213, "eval_rougeLsum": 0.0212, "eval_runtime": 9.5107, "eval_samples_per_second": 11.566, "eval_steps_per_second": 1.472, "step": 4015 }, { "epoch": 146.98, "eval_gen_len": 7.1182, "eval_loss": 0.35853010416030884, "eval_rouge1": 0.022, "eval_rouge2": 0.0145, "eval_rougeL": 0.0213, "eval_rougeLsum": 0.0212, "eval_runtime": 9.5074, "eval_samples_per_second": 11.57, "eval_steps_per_second": 1.473, "step": 4042 }, { "epoch": 148.0, "eval_gen_len": 7.7364, "eval_loss": 0.35745835304260254, "eval_rouge1": 0.0283, "eval_rouge2": 0.018, "eval_rougeL": 0.0269, "eval_rougeLsum": 0.0269, "eval_runtime": 9.482, "eval_samples_per_second": 11.601, "eval_steps_per_second": 1.476, "step": 4070 }, { "epoch": 148.98, "eval_gen_len": 8.3364, "eval_loss": 0.3559305965900421, "eval_rouge1": 0.0374, "eval_rouge2": 0.0236, "eval_rougeL": 0.0357, "eval_rougeLsum": 0.0359, "eval_runtime": 9.4877, "eval_samples_per_second": 11.594, "eval_steps_per_second": 1.476, "step": 4097 }, { "epoch": 150.0, "eval_gen_len": 8.9091, "eval_loss": 0.3528214395046234, "eval_rouge1": 0.0384, "eval_rouge2": 0.0236, "eval_rougeL": 0.0366, "eval_rougeLsum": 0.0369, "eval_runtime": 9.4747, "eval_samples_per_second": 11.61, "eval_steps_per_second": 1.478, "step": 4125 }, { "epoch": 150.98, "eval_gen_len": 9.4455, "eval_loss": 0.35084155201911926, "eval_rouge1": 0.0416, "eval_rouge2": 0.0254, "eval_rougeL": 0.0399, "eval_rougeLsum": 0.0399, "eval_runtime": 9.4685, "eval_samples_per_second": 11.618, "eval_steps_per_second": 1.479, "step": 4152 }, { "epoch": 152.0, "eval_gen_len": 9.6091, "eval_loss": 0.34896430373191833, "eval_rouge1": 0.0439, "eval_rouge2": 0.0257, "eval_rougeL": 0.0413, "eval_rougeLsum": 0.0415, "eval_runtime": 9.5218, "eval_samples_per_second": 11.552, "eval_steps_per_second": 1.47, "step": 4180 }, { "epoch": 152.98, "eval_gen_len": 9.7636, "eval_loss": 0.34783267974853516, "eval_rouge1": 0.0479, "eval_rouge2": 0.0297, "eval_rougeL": 0.045, "eval_rougeLsum": 0.0454, "eval_runtime": 9.5136, "eval_samples_per_second": 11.562, "eval_steps_per_second": 1.472, "step": 4207 }, { "epoch": 154.0, "eval_gen_len": 10.1909, "eval_loss": 0.3452661335468292, "eval_rouge1": 0.0495, "eval_rouge2": 0.0291, "eval_rougeL": 0.0464, "eval_rougeLsum": 0.0464, "eval_runtime": 9.5097, "eval_samples_per_second": 11.567, "eval_steps_per_second": 1.472, "step": 4235 }, { "epoch": 154.98, "eval_gen_len": 10.6, "eval_loss": 0.34583330154418945, "eval_rouge1": 0.0576, "eval_rouge2": 0.035, "eval_rougeL": 0.055, "eval_rougeLsum": 0.0551, "eval_runtime": 9.4851, "eval_samples_per_second": 11.597, "eval_steps_per_second": 1.476, "step": 4262 }, { "epoch": 156.0, "eval_gen_len": 10.2909, "eval_loss": 0.3417557179927826, "eval_rouge1": 0.0533, "eval_rouge2": 0.0314, "eval_rougeL": 0.0506, "eval_rougeLsum": 0.0507, "eval_runtime": 9.474, "eval_samples_per_second": 11.611, "eval_steps_per_second": 1.478, "step": 4290 }, { "epoch": 156.98, "eval_gen_len": 10.9364, "eval_loss": 0.3396497666835785, "eval_rouge1": 0.0591, "eval_rouge2": 0.0351, "eval_rougeL": 0.0561, "eval_rougeLsum": 0.0561, "eval_runtime": 9.496, "eval_samples_per_second": 11.584, "eval_steps_per_second": 1.474, "step": 4317 }, { "epoch": 158.0, "eval_gen_len": 11.0364, "eval_loss": 0.3386593759059906, "eval_rouge1": 0.0633, "eval_rouge2": 0.0387, "eval_rougeL": 0.0605, "eval_rougeLsum": 0.0601, "eval_runtime": 9.4912, "eval_samples_per_second": 11.59, "eval_steps_per_second": 1.475, "step": 4345 }, { "epoch": 158.98, "eval_gen_len": 11.4455, "eval_loss": 0.3368191123008728, "eval_rouge1": 0.0614, "eval_rouge2": 0.0371, "eval_rougeL": 0.0593, "eval_rougeLsum": 0.0583, "eval_runtime": 9.5012, "eval_samples_per_second": 11.578, "eval_steps_per_second": 1.474, "step": 4372 }, { "epoch": 160.0, "eval_gen_len": 11.4545, "eval_loss": 0.33700016140937805, "eval_rouge1": 0.0702, "eval_rouge2": 0.0444, "eval_rougeL": 0.0672, "eval_rougeLsum": 0.0671, "eval_runtime": 9.4911, "eval_samples_per_second": 11.59, "eval_steps_per_second": 1.475, "step": 4400 }, { "epoch": 160.98, "eval_gen_len": 11.4182, "eval_loss": 0.3347805440425873, "eval_rouge1": 0.0702, "eval_rouge2": 0.0444, "eval_rougeL": 0.0672, "eval_rougeLsum": 0.0671, "eval_runtime": 9.4829, "eval_samples_per_second": 11.6, "eval_steps_per_second": 1.476, "step": 4427 }, { "epoch": 162.0, "eval_gen_len": 11.2818, "eval_loss": 0.332653284072876, "eval_rouge1": 0.0691, "eval_rouge2": 0.0438, "eval_rougeL": 0.0661, "eval_rougeLsum": 0.0656, "eval_runtime": 9.5355, "eval_samples_per_second": 11.536, "eval_steps_per_second": 1.468, "step": 4455 }, { "epoch": 162.98, "eval_gen_len": 11.3545, "eval_loss": 0.3307643234729767, "eval_rouge1": 0.0648, "eval_rouge2": 0.0405, "eval_rougeL": 0.0614, "eval_rougeLsum": 0.0609, "eval_runtime": 9.4851, "eval_samples_per_second": 11.597, "eval_steps_per_second": 1.476, "step": 4482 }, { "epoch": 163.64, "grad_norm": 0.6090702414512634, "learning_rate": 5.20164609053498e-06, "loss": 0.4471, "step": 4500 }, { "epoch": 164.0, "eval_gen_len": 11.7909, "eval_loss": 0.32992881536483765, "eval_rouge1": 0.0711, "eval_rouge2": 0.0441, "eval_rougeL": 0.0677, "eval_rougeLsum": 0.0667, "eval_runtime": 9.5142, "eval_samples_per_second": 11.562, "eval_steps_per_second": 1.471, "step": 4510 }, { "epoch": 164.98, "eval_gen_len": 12.0273, "eval_loss": 0.3291892111301422, "eval_rouge1": 0.0749, "eval_rouge2": 0.0476, "eval_rougeL": 0.0709, "eval_rougeLsum": 0.0701, "eval_runtime": 9.5232, "eval_samples_per_second": 11.551, "eval_steps_per_second": 1.47, "step": 4537 }, { "epoch": 166.0, "eval_gen_len": 12.4364, "eval_loss": 0.3260752558708191, "eval_rouge1": 0.078, "eval_rouge2": 0.0484, "eval_rougeL": 0.0728, "eval_rougeLsum": 0.0724, "eval_runtime": 9.5359, "eval_samples_per_second": 11.535, "eval_steps_per_second": 1.468, "step": 4565 }, { "epoch": 166.98, "eval_gen_len": 12.6636, "eval_loss": 0.32543399930000305, "eval_rouge1": 0.0865, "eval_rouge2": 0.0555, "eval_rougeL": 0.0812, "eval_rougeLsum": 0.0806, "eval_runtime": 9.5501, "eval_samples_per_second": 11.518, "eval_steps_per_second": 1.466, "step": 4592 }, { "epoch": 168.0, "eval_gen_len": 12.8455, "eval_loss": 0.3237576484680176, "eval_rouge1": 0.081, "eval_rouge2": 0.0501, "eval_rougeL": 0.0763, "eval_rougeLsum": 0.0754, "eval_runtime": 9.5386, "eval_samples_per_second": 11.532, "eval_steps_per_second": 1.468, "step": 4620 }, { "epoch": 168.98, "eval_gen_len": 13.1273, "eval_loss": 0.32304662466049194, "eval_rouge1": 0.0899, "eval_rouge2": 0.0584, "eval_rougeL": 0.0846, "eval_rougeLsum": 0.0836, "eval_runtime": 9.4963, "eval_samples_per_second": 11.583, "eval_steps_per_second": 1.474, "step": 4647 }, { "epoch": 170.0, "eval_gen_len": 13.5364, "eval_loss": 0.3217833638191223, "eval_rouge1": 0.0936, "eval_rouge2": 0.0579, "eval_rougeL": 0.0864, "eval_rougeLsum": 0.0854, "eval_runtime": 9.5121, "eval_samples_per_second": 11.564, "eval_steps_per_second": 1.472, "step": 4675 }, { "epoch": 170.98, "eval_gen_len": 13.9455, "eval_loss": 0.3209010660648346, "eval_rouge1": 0.0976, "eval_rouge2": 0.063, "eval_rougeL": 0.0914, "eval_rougeLsum": 0.0904, "eval_runtime": 9.507, "eval_samples_per_second": 11.57, "eval_steps_per_second": 1.473, "step": 4702 }, { "epoch": 172.0, "eval_gen_len": 14.2545, "eval_loss": 0.31976738572120667, "eval_rouge1": 0.1024, "eval_rouge2": 0.0663, "eval_rougeL": 0.0959, "eval_rougeLsum": 0.0946, "eval_runtime": 9.527, "eval_samples_per_second": 11.546, "eval_steps_per_second": 1.47, "step": 4730 }, { "epoch": 172.98, "eval_gen_len": 13.7909, "eval_loss": 0.31930533051490784, "eval_rouge1": 0.0943, "eval_rouge2": 0.0596, "eval_rougeL": 0.0883, "eval_rougeLsum": 0.0873, "eval_runtime": 9.5147, "eval_samples_per_second": 11.561, "eval_steps_per_second": 1.471, "step": 4757 }, { "epoch": 174.0, "eval_gen_len": 13.9182, "eval_loss": 0.3177041709423065, "eval_rouge1": 0.0964, "eval_rouge2": 0.0605, "eval_rougeL": 0.0905, "eval_rougeLsum": 0.0896, "eval_runtime": 9.4816, "eval_samples_per_second": 11.601, "eval_steps_per_second": 1.477, "step": 4785 }, { "epoch": 174.98, "eval_gen_len": 13.4636, "eval_loss": 0.3157893121242523, "eval_rouge1": 0.0978, "eval_rouge2": 0.0628, "eval_rougeL": 0.0918, "eval_rougeLsum": 0.0905, "eval_runtime": 9.4906, "eval_samples_per_second": 11.59, "eval_steps_per_second": 1.475, "step": 4812 }, { "epoch": 176.0, "eval_gen_len": 14.3273, "eval_loss": 0.3147883713245392, "eval_rouge1": 0.1021, "eval_rouge2": 0.0643, "eval_rougeL": 0.0951, "eval_rougeLsum": 0.0943, "eval_runtime": 9.5161, "eval_samples_per_second": 11.559, "eval_steps_per_second": 1.471, "step": 4840 }, { "epoch": 176.98, "eval_gen_len": 14.3727, "eval_loss": 0.31470227241516113, "eval_rouge1": 0.1043, "eval_rouge2": 0.0661, "eval_rougeL": 0.0982, "eval_rougeLsum": 0.0972, "eval_runtime": 9.5231, "eval_samples_per_second": 11.551, "eval_steps_per_second": 1.47, "step": 4867 }, { "epoch": 178.0, "eval_gen_len": 14.4455, "eval_loss": 0.31245002150535583, "eval_rouge1": 0.1068, "eval_rouge2": 0.0691, "eval_rougeL": 0.1016, "eval_rougeLsum": 0.1001, "eval_runtime": 9.5249, "eval_samples_per_second": 11.549, "eval_steps_per_second": 1.47, "step": 4895 }, { "epoch": 178.98, "eval_gen_len": 14.7091, "eval_loss": 0.3121263384819031, "eval_rouge1": 0.1073, "eval_rouge2": 0.0682, "eval_rougeL": 0.1012, "eval_rougeLsum": 0.1, "eval_runtime": 9.531, "eval_samples_per_second": 11.541, "eval_steps_per_second": 1.469, "step": 4922 }, { "epoch": 180.0, "eval_gen_len": 14.7182, "eval_loss": 0.310048907995224, "eval_rouge1": 0.1095, "eval_rouge2": 0.0698, "eval_rougeL": 0.1038, "eval_rougeLsum": 0.1022, "eval_runtime": 9.5031, "eval_samples_per_second": 11.575, "eval_steps_per_second": 1.473, "step": 4950 }, { "epoch": 180.98, "eval_gen_len": 14.5727, "eval_loss": 0.3098377585411072, "eval_rouge1": 0.1122, "eval_rouge2": 0.0717, "eval_rougeL": 0.1054, "eval_rougeLsum": 0.1048, "eval_runtime": 9.5178, "eval_samples_per_second": 11.557, "eval_steps_per_second": 1.471, "step": 4977 }, { "epoch": 181.82, "grad_norm": 0.6373523473739624, "learning_rate": 3.555555555555556e-06, "loss": 0.4093, "step": 5000 }, { "epoch": 182.0, "eval_gen_len": 14.8182, "eval_loss": 0.3088683784008026, "eval_rouge1": 0.1148, "eval_rouge2": 0.074, "eval_rougeL": 0.1082, "eval_rougeLsum": 0.1071, "eval_runtime": 9.5147, "eval_samples_per_second": 11.561, "eval_steps_per_second": 1.471, "step": 5005 }, { "epoch": 182.98, "eval_gen_len": 14.6727, "eval_loss": 0.30828168988227844, "eval_rouge1": 0.1088, "eval_rouge2": 0.0689, "eval_rougeL": 0.1021, "eval_rougeLsum": 0.1014, "eval_runtime": 9.5092, "eval_samples_per_second": 11.568, "eval_steps_per_second": 1.472, "step": 5032 }, { "epoch": 184.0, "eval_gen_len": 15.2182, "eval_loss": 0.30716758966445923, "eval_rouge1": 0.1167, "eval_rouge2": 0.0746, "eval_rougeL": 0.1098, "eval_rougeLsum": 0.1084, "eval_runtime": 9.5556, "eval_samples_per_second": 11.512, "eval_steps_per_second": 1.465, "step": 5060 }, { "epoch": 184.98, "eval_gen_len": 15.9364, "eval_loss": 0.3059370815753937, "eval_rouge1": 0.1233, "eval_rouge2": 0.08, "eval_rougeL": 0.1166, "eval_rougeLsum": 0.1157, "eval_runtime": 9.5537, "eval_samples_per_second": 11.514, "eval_steps_per_second": 1.465, "step": 5087 }, { "epoch": 186.0, "eval_gen_len": 15.1727, "eval_loss": 0.3056795597076416, "eval_rouge1": 0.1128, "eval_rouge2": 0.0707, "eval_rougeL": 0.1055, "eval_rougeLsum": 0.1049, "eval_runtime": 9.5621, "eval_samples_per_second": 11.504, "eval_steps_per_second": 1.464, "step": 5115 }, { "epoch": 186.98, "eval_gen_len": 15.1818, "eval_loss": 0.3043256402015686, "eval_rouge1": 0.1131, "eval_rouge2": 0.0707, "eval_rougeL": 0.1057, "eval_rougeLsum": 0.105, "eval_runtime": 9.5545, "eval_samples_per_second": 11.513, "eval_steps_per_second": 1.465, "step": 5142 }, { "epoch": 188.0, "eval_gen_len": 15.1727, "eval_loss": 0.30425599217414856, "eval_rouge1": 0.1125, "eval_rouge2": 0.0703, "eval_rougeL": 0.1052, "eval_rougeLsum": 0.1046, "eval_runtime": 9.5018, "eval_samples_per_second": 11.577, "eval_steps_per_second": 1.473, "step": 5170 }, { "epoch": 188.98, "eval_gen_len": 15.1636, "eval_loss": 0.30395984649658203, "eval_rouge1": 0.1128, "eval_rouge2": 0.0705, "eval_rougeL": 0.1054, "eval_rougeLsum": 0.1049, "eval_runtime": 9.5213, "eval_samples_per_second": 11.553, "eval_steps_per_second": 1.47, "step": 5197 }, { "epoch": 190.0, "eval_gen_len": 15.2455, "eval_loss": 0.30324217677116394, "eval_rouge1": 0.1136, "eval_rouge2": 0.0705, "eval_rougeL": 0.1061, "eval_rougeLsum": 0.1056, "eval_runtime": 9.5432, "eval_samples_per_second": 11.527, "eval_steps_per_second": 1.467, "step": 5225 }, { "epoch": 190.98, "eval_gen_len": 15.6182, "eval_loss": 0.30257585644721985, "eval_rouge1": 0.1149, "eval_rouge2": 0.071, "eval_rougeL": 0.1075, "eval_rougeLsum": 0.107, "eval_runtime": 9.5398, "eval_samples_per_second": 11.531, "eval_steps_per_second": 1.468, "step": 5252 }, { "epoch": 192.0, "eval_gen_len": 15.7545, "eval_loss": 0.301755428314209, "eval_rouge1": 0.118, "eval_rouge2": 0.0744, "eval_rougeL": 0.1114, "eval_rougeLsum": 0.1105, "eval_runtime": 9.5491, "eval_samples_per_second": 11.519, "eval_steps_per_second": 1.466, "step": 5280 }, { "epoch": 192.98, "eval_gen_len": 15.7545, "eval_loss": 0.30100175738334656, "eval_rouge1": 0.1186, "eval_rouge2": 0.0756, "eval_rougeL": 0.1122, "eval_rougeLsum": 0.1116, "eval_runtime": 9.5605, "eval_samples_per_second": 11.506, "eval_steps_per_second": 1.464, "step": 5307 }, { "epoch": 194.0, "eval_gen_len": 15.6727, "eval_loss": 0.3014240562915802, "eval_rouge1": 0.1169, "eval_rouge2": 0.0738, "eval_rougeL": 0.1106, "eval_rougeLsum": 0.1094, "eval_runtime": 9.5649, "eval_samples_per_second": 11.5, "eval_steps_per_second": 1.464, "step": 5335 }, { "epoch": 194.98, "eval_gen_len": 15.6364, "eval_loss": 0.3001127541065216, "eval_rouge1": 0.1161, "eval_rouge2": 0.0734, "eval_rougeL": 0.1104, "eval_rougeLsum": 0.1092, "eval_runtime": 9.558, "eval_samples_per_second": 11.509, "eval_steps_per_second": 1.465, "step": 5362 }, { "epoch": 196.0, "eval_gen_len": 15.7909, "eval_loss": 0.29918792843818665, "eval_rouge1": 0.1192, "eval_rouge2": 0.0752, "eval_rougeL": 0.1132, "eval_rougeLsum": 0.1125, "eval_runtime": 9.5635, "eval_samples_per_second": 11.502, "eval_steps_per_second": 1.464, "step": 5390 }, { "epoch": 196.98, "eval_gen_len": 15.6364, "eval_loss": 0.2991277277469635, "eval_rouge1": 0.1205, "eval_rouge2": 0.0772, "eval_rougeL": 0.1141, "eval_rougeLsum": 0.1133, "eval_runtime": 9.524, "eval_samples_per_second": 11.55, "eval_steps_per_second": 1.47, "step": 5417 }, { "epoch": 198.0, "eval_gen_len": 15.7909, "eval_loss": 0.2986967861652374, "eval_rouge1": 0.1202, "eval_rouge2": 0.0768, "eval_rougeL": 0.1143, "eval_rougeLsum": 0.1138, "eval_runtime": 9.5106, "eval_samples_per_second": 11.566, "eval_steps_per_second": 1.472, "step": 5445 }, { "epoch": 198.98, "eval_gen_len": 15.6182, "eval_loss": 0.2981720566749573, "eval_rouge1": 0.1217, "eval_rouge2": 0.0793, "eval_rougeL": 0.1153, "eval_rougeLsum": 0.1148, "eval_runtime": 9.5676, "eval_samples_per_second": 11.497, "eval_steps_per_second": 1.463, "step": 5472 }, { "epoch": 200.0, "grad_norm": 1.2200640439987183, "learning_rate": 1.909465020576132e-06, "loss": 0.3864, "step": 5500 }, { "epoch": 200.0, "eval_gen_len": 15.7818, "eval_loss": 0.29755449295043945, "eval_rouge1": 0.1218, "eval_rouge2": 0.079, "eval_rougeL": 0.1154, "eval_rougeLsum": 0.1147, "eval_runtime": 9.547, "eval_samples_per_second": 11.522, "eval_steps_per_second": 1.466, "step": 5500 }, { "epoch": 200.98, "eval_gen_len": 15.6182, "eval_loss": 0.29704758524894714, "eval_rouge1": 0.1219, "eval_rouge2": 0.0811, "eval_rougeL": 0.1163, "eval_rougeLsum": 0.116, "eval_runtime": 9.5552, "eval_samples_per_second": 11.512, "eval_steps_per_second": 1.465, "step": 5527 }, { "epoch": 202.0, "eval_gen_len": 15.5273, "eval_loss": 0.29719075560569763, "eval_rouge1": 0.122, "eval_rouge2": 0.0811, "eval_rougeL": 0.116, "eval_rougeLsum": 0.1157, "eval_runtime": 9.5815, "eval_samples_per_second": 11.48, "eval_steps_per_second": 1.461, "step": 5555 }, { "epoch": 202.98, "eval_gen_len": 15.6909, "eval_loss": 0.29677239060401917, "eval_rouge1": 0.1209, "eval_rouge2": 0.0803, "eval_rougeL": 0.1156, "eval_rougeLsum": 0.1149, "eval_runtime": 9.499, "eval_samples_per_second": 11.58, "eval_steps_per_second": 1.474, "step": 5582 }, { "epoch": 204.0, "eval_gen_len": 15.5273, "eval_loss": 0.2963531017303467, "eval_rouge1": 0.1251, "eval_rouge2": 0.0846, "eval_rougeL": 0.1207, "eval_rougeLsum": 0.1194, "eval_runtime": 9.5143, "eval_samples_per_second": 11.562, "eval_steps_per_second": 1.471, "step": 5610 }, { "epoch": 204.98, "eval_gen_len": 15.6909, "eval_loss": 0.29531627893447876, "eval_rouge1": 0.1239, "eval_rouge2": 0.0831, "eval_rougeL": 0.1193, "eval_rougeLsum": 0.1184, "eval_runtime": 9.5198, "eval_samples_per_second": 11.555, "eval_steps_per_second": 1.471, "step": 5637 }, { "epoch": 206.0, "eval_gen_len": 15.5273, "eval_loss": 0.29536357522010803, "eval_rouge1": 0.1236, "eval_rouge2": 0.0835, "eval_rougeL": 0.1192, "eval_rougeLsum": 0.1182, "eval_runtime": 9.5066, "eval_samples_per_second": 11.571, "eval_steps_per_second": 1.473, "step": 5665 }, { "epoch": 206.98, "eval_gen_len": 15.6727, "eval_loss": 0.2951861023902893, "eval_rouge1": 0.1236, "eval_rouge2": 0.0832, "eval_rougeL": 0.1191, "eval_rougeLsum": 0.1181, "eval_runtime": 9.5179, "eval_samples_per_second": 11.557, "eval_steps_per_second": 1.471, "step": 5692 }, { "epoch": 208.0, "eval_gen_len": 15.8273, "eval_loss": 0.29457393288612366, "eval_rouge1": 0.1262, "eval_rouge2": 0.0856, "eval_rougeL": 0.1223, "eval_rougeLsum": 0.121, "eval_runtime": 9.5235, "eval_samples_per_second": 11.55, "eval_steps_per_second": 1.47, "step": 5720 }, { "epoch": 208.98, "eval_gen_len": 15.8273, "eval_loss": 0.29461580514907837, "eval_rouge1": 0.1269, "eval_rouge2": 0.086, "eval_rougeL": 0.1227, "eval_rougeLsum": 0.1213, "eval_runtime": 9.5193, "eval_samples_per_second": 11.556, "eval_steps_per_second": 1.471, "step": 5747 }, { "epoch": 210.0, "eval_gen_len": 15.6727, "eval_loss": 0.2948046028614044, "eval_rouge1": 0.1261, "eval_rouge2": 0.0859, "eval_rougeL": 0.1221, "eval_rougeLsum": 0.1208, "eval_runtime": 9.5297, "eval_samples_per_second": 11.543, "eval_steps_per_second": 1.469, "step": 5775 }, { "epoch": 210.98, "eval_gen_len": 15.7636, "eval_loss": 0.29468077421188354, "eval_rouge1": 0.129, "eval_rouge2": 0.0888, "eval_rougeL": 0.1244, "eval_rougeLsum": 0.1235, "eval_runtime": 9.5285, "eval_samples_per_second": 11.544, "eval_steps_per_second": 1.469, "step": 5802 }, { "epoch": 212.0, "eval_gen_len": 15.9091, "eval_loss": 0.2943172752857208, "eval_rouge1": 0.1308, "eval_rouge2": 0.0909, "eval_rougeL": 0.1267, "eval_rougeLsum": 0.1254, "eval_runtime": 9.5333, "eval_samples_per_second": 11.538, "eval_steps_per_second": 1.469, "step": 5830 }, { "epoch": 212.98, "eval_gen_len": 15.9091, "eval_loss": 0.2938406467437744, "eval_rouge1": 0.1293, "eval_rouge2": 0.0888, "eval_rougeL": 0.1251, "eval_rougeLsum": 0.1236, "eval_runtime": 9.5369, "eval_samples_per_second": 11.534, "eval_steps_per_second": 1.468, "step": 5857 }, { "epoch": 214.0, "eval_gen_len": 15.9727, "eval_loss": 0.2933821678161621, "eval_rouge1": 0.1284, "eval_rouge2": 0.0877, "eval_rougeL": 0.1243, "eval_rougeLsum": 0.123, "eval_runtime": 9.5327, "eval_samples_per_second": 11.539, "eval_steps_per_second": 1.469, "step": 5885 }, { "epoch": 214.98, "eval_gen_len": 15.8727, "eval_loss": 0.2933785617351532, "eval_rouge1": 0.1262, "eval_rouge2": 0.0855, "eval_rougeL": 0.1221, "eval_rougeLsum": 0.1208, "eval_runtime": 9.542, "eval_samples_per_second": 11.528, "eval_steps_per_second": 1.467, "step": 5912 }, { "epoch": 216.0, "eval_gen_len": 15.8727, "eval_loss": 0.29337677359580994, "eval_rouge1": 0.1267, "eval_rouge2": 0.0863, "eval_rougeL": 0.1225, "eval_rougeLsum": 0.1214, "eval_runtime": 9.5387, "eval_samples_per_second": 11.532, "eval_steps_per_second": 1.468, "step": 5940 }, { "epoch": 216.98, "eval_gen_len": 15.9636, "eval_loss": 0.2933517396450043, "eval_rouge1": 0.1292, "eval_rouge2": 0.0894, "eval_rougeL": 0.1254, "eval_rougeLsum": 0.1241, "eval_runtime": 9.5629, "eval_samples_per_second": 11.503, "eval_steps_per_second": 1.464, "step": 5967 }, { "epoch": 218.0, "eval_gen_len": 15.9636, "eval_loss": 0.29335835576057434, "eval_rouge1": 0.1289, "eval_rouge2": 0.0889, "eval_rougeL": 0.1249, "eval_rougeLsum": 0.1234, "eval_runtime": 9.5531, "eval_samples_per_second": 11.515, "eval_steps_per_second": 1.465, "step": 5995 }, { "epoch": 218.18, "grad_norm": 0.6804682016372681, "learning_rate": 2.6337448559670784e-07, "loss": 0.3747, "step": 6000 } ], "logging_steps": 500, "max_steps": 6075, "num_input_tokens_seen": 0, "num_train_epochs": 225, "save_steps": 500, "total_flos": 1.1611644167297434e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }