{ "best_metric": null, "best_model_checkpoint": null, "epoch": 181.8181818181818, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.95, "eval_gen_len": 13.1091, "eval_loss": 21.40757179260254, "eval_rouge1": 0.0745, "eval_rouge2": 0.0159, "eval_rougeL": 0.0584, "eval_rougeLsum": 0.0582, "eval_runtime": 11.5492, "eval_samples_per_second": 9.524, "eval_steps_per_second": 1.212, "step": 13 }, { "epoch": 1.96, "eval_gen_len": 12.9636, "eval_loss": 21.254241943359375, "eval_rouge1": 0.0726, "eval_rouge2": 0.0153, "eval_rougeL": 0.0568, "eval_rougeLsum": 0.0569, "eval_runtime": 9.5424, "eval_samples_per_second": 11.528, "eval_steps_per_second": 1.467, "step": 27 }, { "epoch": 2.98, "eval_gen_len": 12.9636, "eval_loss": 21.031164169311523, "eval_rouge1": 0.0724, "eval_rouge2": 0.0176, "eval_rougeL": 0.0568, "eval_rougeLsum": 0.0569, "eval_runtime": 9.5313, "eval_samples_per_second": 11.541, "eval_steps_per_second": 1.469, "step": 41 }, { "epoch": 4.0, "eval_gen_len": 12.6727, "eval_loss": 20.743330001831055, "eval_rouge1": 0.0722, "eval_rouge2": 0.0175, "eval_rougeL": 0.055, "eval_rougeLsum": 0.0551, "eval_runtime": 9.5638, "eval_samples_per_second": 11.502, "eval_steps_per_second": 1.464, "step": 55 }, { "epoch": 4.95, "eval_gen_len": 12.5273, "eval_loss": 20.430522918701172, "eval_rouge1": 0.0708, "eval_rouge2": 0.0177, "eval_rougeL": 0.0545, "eval_rougeLsum": 0.0547, "eval_runtime": 9.583, "eval_samples_per_second": 11.479, "eval_steps_per_second": 1.461, "step": 68 }, { "epoch": 5.96, "eval_gen_len": 12.6091, "eval_loss": 20.10591697692871, "eval_rouge1": 0.0725, "eval_rouge2": 0.0185, "eval_rougeL": 0.0559, "eval_rougeLsum": 0.0559, "eval_runtime": 9.5892, "eval_samples_per_second": 11.471, "eval_steps_per_second": 1.46, "step": 82 }, { "epoch": 6.98, "eval_gen_len": 12.6091, "eval_loss": 19.850391387939453, "eval_rouge1": 0.0727, "eval_rouge2": 0.0169, "eval_rougeL": 0.0551, "eval_rougeLsum": 0.0547, "eval_runtime": 9.6928, "eval_samples_per_second": 11.349, "eval_steps_per_second": 1.444, "step": 96 }, { "epoch": 8.0, "eval_gen_len": 13.2909, "eval_loss": 19.628494262695312, "eval_rouge1": 0.0816, "eval_rouge2": 0.0228, "eval_rougeL": 0.062, "eval_rougeLsum": 0.0622, "eval_runtime": 9.7244, "eval_samples_per_second": 11.312, "eval_steps_per_second": 1.44, "step": 110 }, { "epoch": 8.95, "eval_gen_len": 13.2909, "eval_loss": 19.41258430480957, "eval_rouge1": 0.0811, "eval_rouge2": 0.0214, "eval_rougeL": 0.0613, "eval_rougeLsum": 0.0614, "eval_runtime": 9.5996, "eval_samples_per_second": 11.459, "eval_steps_per_second": 1.458, "step": 123 }, { "epoch": 9.96, "eval_gen_len": 13.5182, "eval_loss": 19.160032272338867, "eval_rouge1": 0.0818, "eval_rouge2": 0.0208, "eval_rougeL": 0.0632, "eval_rougeLsum": 0.0633, "eval_runtime": 9.5959, "eval_samples_per_second": 11.463, "eval_steps_per_second": 1.459, "step": 137 }, { "epoch": 10.98, "eval_gen_len": 13.8909, "eval_loss": 18.8905086517334, "eval_rouge1": 0.09, "eval_rouge2": 0.024, "eval_rougeL": 0.0698, "eval_rougeLsum": 0.0696, "eval_runtime": 9.6532, "eval_samples_per_second": 11.395, "eval_steps_per_second": 1.45, "step": 151 }, { "epoch": 12.0, "eval_gen_len": 14.1818, "eval_loss": 18.593591690063477, "eval_rouge1": 0.094, "eval_rouge2": 0.0324, "eval_rougeL": 0.0735, "eval_rougeLsum": 0.0732, "eval_runtime": 9.6893, "eval_samples_per_second": 11.353, "eval_steps_per_second": 1.445, "step": 165 }, { "epoch": 12.95, "eval_gen_len": 14.4364, "eval_loss": 18.286243438720703, "eval_rouge1": 0.0928, "eval_rouge2": 0.0329, "eval_rougeL": 0.0746, "eval_rougeLsum": 0.0749, "eval_runtime": 9.7163, "eval_samples_per_second": 11.321, "eval_steps_per_second": 1.441, "step": 178 }, { "epoch": 13.96, "eval_gen_len": 15.0727, "eval_loss": 17.896913528442383, "eval_rouge1": 0.096, "eval_rouge2": 0.0328, "eval_rougeL": 0.0788, "eval_rougeLsum": 0.0792, "eval_runtime": 9.6929, "eval_samples_per_second": 11.348, "eval_steps_per_second": 1.444, "step": 192 }, { "epoch": 14.98, "eval_gen_len": 15.7364, "eval_loss": 17.442358016967773, "eval_rouge1": 0.1015, "eval_rouge2": 0.0334, "eval_rougeL": 0.0816, "eval_rougeLsum": 0.0818, "eval_runtime": 9.6767, "eval_samples_per_second": 11.368, "eval_steps_per_second": 1.447, "step": 206 }, { "epoch": 16.0, "eval_gen_len": 16.2364, "eval_loss": 16.91552734375, "eval_rouge1": 0.1001, "eval_rouge2": 0.0337, "eval_rougeL": 0.0811, "eval_rougeLsum": 0.0814, "eval_runtime": 9.5645, "eval_samples_per_second": 11.501, "eval_steps_per_second": 1.464, "step": 220 }, { "epoch": 16.95, "eval_gen_len": 16.6818, "eval_loss": 16.372838973999023, "eval_rouge1": 0.102, "eval_rouge2": 0.0336, "eval_rougeL": 0.081, "eval_rougeLsum": 0.0809, "eval_runtime": 9.7488, "eval_samples_per_second": 11.283, "eval_steps_per_second": 1.436, "step": 233 }, { "epoch": 17.96, "eval_gen_len": 16.5364, "eval_loss": 15.727865219116211, "eval_rouge1": 0.0901, "eval_rouge2": 0.0301, "eval_rougeL": 0.0739, "eval_rougeLsum": 0.0741, "eval_runtime": 9.6417, "eval_samples_per_second": 11.409, "eval_steps_per_second": 1.452, "step": 247 }, { "epoch": 18.98, "eval_gen_len": 16.9818, "eval_loss": 14.980103492736816, "eval_rouge1": 0.0828, "eval_rouge2": 0.0258, "eval_rougeL": 0.0673, "eval_rougeLsum": 0.0671, "eval_runtime": 9.6417, "eval_samples_per_second": 11.409, "eval_steps_per_second": 1.452, "step": 261 }, { "epoch": 20.0, "eval_gen_len": 16.4727, "eval_loss": 14.11096477508545, "eval_rouge1": 0.0687, "eval_rouge2": 0.0146, "eval_rougeL": 0.0549, "eval_rougeLsum": 0.0547, "eval_runtime": 9.556, "eval_samples_per_second": 11.511, "eval_steps_per_second": 1.465, "step": 275 }, { "epoch": 20.95, "eval_gen_len": 14.2364, "eval_loss": 13.337847709655762, "eval_rouge1": 0.051, "eval_rouge2": 0.0102, "eval_rougeL": 0.0418, "eval_rougeLsum": 0.0414, "eval_runtime": 9.7157, "eval_samples_per_second": 11.322, "eval_steps_per_second": 1.441, "step": 288 }, { "epoch": 21.96, "eval_gen_len": 12.3818, "eval_loss": 12.585112571716309, "eval_rouge1": 0.0377, "eval_rouge2": 0.007, "eval_rougeL": 0.0317, "eval_rougeLsum": 0.0317, "eval_runtime": 9.5457, "eval_samples_per_second": 11.524, "eval_steps_per_second": 1.467, "step": 302 }, { "epoch": 22.98, "eval_gen_len": 12.0455, "eval_loss": 11.860977172851562, "eval_rouge1": 0.023, "eval_rouge2": 0.0048, "eval_rougeL": 0.0204, "eval_rougeLsum": 0.0204, "eval_runtime": 9.5524, "eval_samples_per_second": 11.515, "eval_steps_per_second": 1.466, "step": 316 }, { "epoch": 24.0, "eval_gen_len": 10.8545, "eval_loss": 11.155168533325195, "eval_rouge1": 0.012, "eval_rouge2": 0.001, "eval_rougeL": 0.0108, "eval_rougeLsum": 0.0108, "eval_runtime": 9.5834, "eval_samples_per_second": 11.478, "eval_steps_per_second": 1.461, "step": 330 }, { "epoch": 24.95, "eval_gen_len": 10.0273, "eval_loss": 10.512735366821289, "eval_rouge1": 0.0037, "eval_rouge2": 0.0005, "eval_rougeL": 0.0035, "eval_rougeLsum": 0.0036, "eval_runtime": 9.613, "eval_samples_per_second": 11.443, "eval_steps_per_second": 1.456, "step": 343 }, { "epoch": 25.96, "eval_gen_len": 11.6, "eval_loss": 9.83348560333252, "eval_rouge1": 0.0039, "eval_rouge2": 0.0002, "eval_rougeL": 0.0038, "eval_rougeLsum": 0.0039, "eval_runtime": 9.5253, "eval_samples_per_second": 11.548, "eval_steps_per_second": 1.47, "step": 357 }, { "epoch": 26.98, "eval_gen_len": 13.0455, "eval_loss": 9.216172218322754, "eval_rouge1": 0.0016, "eval_rouge2": 0.0, "eval_rougeL": 0.0016, "eval_rougeLsum": 0.0016, "eval_runtime": 9.5441, "eval_samples_per_second": 11.525, "eval_steps_per_second": 1.467, "step": 371 }, { "epoch": 28.0, "eval_gen_len": 14.6818, "eval_loss": 8.572382926940918, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.6018, "eval_samples_per_second": 11.456, "eval_steps_per_second": 1.458, "step": 385 }, { "epoch": 28.95, "eval_gen_len": 15.2727, "eval_loss": 8.037731170654297, "eval_rouge1": 0.0009, "eval_rouge2": 0.0, "eval_rougeL": 0.0009, "eval_rougeLsum": 0.0009, "eval_runtime": 9.5646, "eval_samples_per_second": 11.501, "eval_steps_per_second": 1.464, "step": 398 }, { "epoch": 29.96, "eval_gen_len": 16.3909, "eval_loss": 7.415070056915283, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.5548, "eval_samples_per_second": 11.513, "eval_steps_per_second": 1.465, "step": 412 }, { "epoch": 30.98, "eval_gen_len": 17.8364, "eval_loss": 6.802865028381348, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5629, "eval_samples_per_second": 11.503, "eval_steps_per_second": 1.464, "step": 426 }, { "epoch": 32.0, "eval_gen_len": 18.2818, "eval_loss": 6.211207866668701, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5163, "eval_samples_per_second": 11.559, "eval_steps_per_second": 1.471, "step": 440 }, { "epoch": 32.95, "eval_gen_len": 18.7091, "eval_loss": 5.717328071594238, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.505, "eval_samples_per_second": 11.573, "eval_steps_per_second": 1.473, "step": 453 }, { "epoch": 33.96, "eval_gen_len": 18.7091, "eval_loss": 5.17288064956665, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.5443, "eval_samples_per_second": 11.525, "eval_steps_per_second": 1.467, "step": 467 }, { "epoch": 34.98, "eval_gen_len": 19.0, "eval_loss": 4.669548034667969, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0008, "eval_runtime": 9.4845, "eval_samples_per_second": 11.598, "eval_steps_per_second": 1.476, "step": 481 }, { "epoch": 36.0, "eval_gen_len": 19.0, "eval_loss": 4.216309547424316, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.5515, "eval_samples_per_second": 11.516, "eval_steps_per_second": 1.466, "step": 495 }, { "epoch": 36.36, "grad_norm": 5.376431465148926, "learning_rate": 1.660854700854701e-05, "loss": 14.4939, "step": 500 }, { "epoch": 36.95, "eval_gen_len": 19.0, "eval_loss": 3.8450570106506348, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5449, "eval_samples_per_second": 11.525, "eval_steps_per_second": 1.467, "step": 508 }, { "epoch": 37.96, "eval_gen_len": 19.0, "eval_loss": 3.507812261581421, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.5902, "eval_samples_per_second": 11.47, "eval_steps_per_second": 1.46, "step": 522 }, { "epoch": 38.98, "eval_gen_len": 19.0, "eval_loss": 3.231505870819092, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5074, "eval_samples_per_second": 11.57, "eval_steps_per_second": 1.473, "step": 536 }, { "epoch": 40.0, "eval_gen_len": 19.0, "eval_loss": 2.9967288970947266, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5195, "eval_samples_per_second": 11.555, "eval_steps_per_second": 1.471, "step": 550 }, { "epoch": 40.95, "eval_gen_len": 18.9545, "eval_loss": 2.8089849948883057, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5224, "eval_samples_per_second": 11.552, "eval_steps_per_second": 1.47, "step": 563 }, { "epoch": 41.96, "eval_gen_len": 18.3727, "eval_loss": 2.6384663581848145, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.5129, "eval_samples_per_second": 11.563, "eval_steps_per_second": 1.472, "step": 577 }, { "epoch": 42.98, "eval_gen_len": 16.7273, "eval_loss": 2.492598295211792, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.5174, "eval_samples_per_second": 11.558, "eval_steps_per_second": 1.471, "step": 591 }, { "epoch": 44.0, "eval_gen_len": 11.9636, "eval_loss": 2.3677501678466797, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.5796, "eval_samples_per_second": 11.483, "eval_steps_per_second": 1.461, "step": 605 }, { "epoch": 44.95, "eval_gen_len": 9.2455, "eval_loss": 2.2777955532073975, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6685, "eval_samples_per_second": 11.377, "eval_steps_per_second": 1.448, "step": 618 }, { "epoch": 45.96, "eval_gen_len": 7.9455, "eval_loss": 2.198147773742676, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.585, "eval_samples_per_second": 11.476, "eval_steps_per_second": 1.461, "step": 632 }, { "epoch": 46.98, "eval_gen_len": 7.5909, "eval_loss": 2.1306562423706055, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6008, "eval_samples_per_second": 11.457, "eval_steps_per_second": 1.458, "step": 646 }, { "epoch": 48.0, "eval_gen_len": 7.4091, "eval_loss": 2.0773117542266846, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.6506, "eval_samples_per_second": 11.398, "eval_steps_per_second": 1.451, "step": 660 }, { "epoch": 48.95, "eval_gen_len": 7.2909, "eval_loss": 2.036808729171753, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5097, "eval_samples_per_second": 11.567, "eval_steps_per_second": 1.472, "step": 673 }, { "epoch": 49.96, "eval_gen_len": 6.8364, "eval_loss": 1.9949748516082764, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6031, "eval_samples_per_second": 11.455, "eval_steps_per_second": 1.458, "step": 687 }, { "epoch": 50.98, "eval_gen_len": 7.8273, "eval_loss": 1.957520842552185, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.5071, "eval_samples_per_second": 11.57, "eval_steps_per_second": 1.473, "step": 701 }, { "epoch": 52.0, "eval_gen_len": 7.5545, "eval_loss": 1.9219788312911987, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5836, "eval_samples_per_second": 11.478, "eval_steps_per_second": 1.461, "step": 715 }, { "epoch": 52.95, "eval_gen_len": 7.5364, "eval_loss": 1.8916202783584595, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6019, "eval_samples_per_second": 11.456, "eval_steps_per_second": 1.458, "step": 728 }, { "epoch": 53.96, "eval_gen_len": 7.1182, "eval_loss": 1.8674402236938477, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5024, "eval_samples_per_second": 11.576, "eval_steps_per_second": 1.473, "step": 742 }, { "epoch": 54.98, "eval_gen_len": 7.0364, "eval_loss": 1.846158742904663, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6139, "eval_samples_per_second": 11.442, "eval_steps_per_second": 1.456, "step": 756 }, { "epoch": 56.0, "eval_gen_len": 7.0, "eval_loss": 1.827086329460144, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.591, "eval_samples_per_second": 11.469, "eval_steps_per_second": 1.46, "step": 770 }, { "epoch": 56.95, "eval_gen_len": 7.5455, "eval_loss": 1.8088210821151733, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.572, "eval_samples_per_second": 11.492, "eval_steps_per_second": 1.463, "step": 783 }, { "epoch": 57.96, "eval_gen_len": 7.9, "eval_loss": 1.789602279663086, "eval_rouge1": 0.0001, "eval_rouge2": 0.0, "eval_rougeL": 0.0001, "eval_rougeLsum": 0.0001, "eval_runtime": 9.5837, "eval_samples_per_second": 11.478, "eval_steps_per_second": 1.461, "step": 797 }, { "epoch": 58.98, "eval_gen_len": 8.2545, "eval_loss": 1.7687013149261475, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.6152, "eval_samples_per_second": 11.44, "eval_steps_per_second": 1.456, "step": 811 }, { "epoch": 60.0, "eval_gen_len": 8.3636, "eval_loss": 1.7496564388275146, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4902, "eval_samples_per_second": 11.591, "eval_steps_per_second": 1.475, "step": 825 }, { "epoch": 60.95, "eval_gen_len": 9.1455, "eval_loss": 1.7332907915115356, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.5867, "eval_samples_per_second": 11.474, "eval_steps_per_second": 1.46, "step": 838 }, { "epoch": 61.96, "eval_gen_len": 8.9, "eval_loss": 1.7185932397842407, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4797, "eval_samples_per_second": 11.604, "eval_steps_per_second": 1.477, "step": 852 }, { "epoch": 62.98, "eval_gen_len": 9.7545, "eval_loss": 1.7047526836395264, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.618, "eval_samples_per_second": 11.437, "eval_steps_per_second": 1.456, "step": 866 }, { "epoch": 64.0, "eval_gen_len": 9.9818, "eval_loss": 1.6921414136886597, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6264, "eval_samples_per_second": 11.427, "eval_steps_per_second": 1.454, "step": 880 }, { "epoch": 64.95, "eval_gen_len": 9.6909, "eval_loss": 1.6816327571868896, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5134, "eval_samples_per_second": 11.563, "eval_steps_per_second": 1.472, "step": 893 }, { "epoch": 65.96, "eval_gen_len": 8.9545, "eval_loss": 1.6697723865509033, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5506, "eval_samples_per_second": 11.518, "eval_steps_per_second": 1.466, "step": 907 }, { "epoch": 66.98, "eval_gen_len": 9.6818, "eval_loss": 1.6568113565444946, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5388, "eval_samples_per_second": 11.532, "eval_steps_per_second": 1.468, "step": 921 }, { "epoch": 68.0, "eval_gen_len": 9.9455, "eval_loss": 1.6469463109970093, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4825, "eval_samples_per_second": 11.6, "eval_steps_per_second": 1.476, "step": 935 }, { "epoch": 68.95, "eval_gen_len": 9.3545, "eval_loss": 1.6408612728118896, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.5212, "eval_samples_per_second": 11.553, "eval_steps_per_second": 1.47, "step": 948 }, { "epoch": 69.96, "eval_gen_len": 9.1545, "eval_loss": 1.6316603422164917, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4986, "eval_samples_per_second": 11.581, "eval_steps_per_second": 1.474, "step": 962 }, { "epoch": 70.98, "eval_gen_len": 9.7818, "eval_loss": 1.623169183731079, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5276, "eval_samples_per_second": 11.545, "eval_steps_per_second": 1.469, "step": 976 }, { "epoch": 72.0, "eval_gen_len": 10.0273, "eval_loss": 1.6152759790420532, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5801, "eval_samples_per_second": 11.482, "eval_steps_per_second": 1.461, "step": 990 }, { "epoch": 72.73, "grad_norm": 3.344996213912964, "learning_rate": 1.3196581196581197e-05, "loss": 2.6089, "step": 1000 }, { "epoch": 72.95, "eval_gen_len": 9.1727, "eval_loss": 1.6071548461914062, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.6124, "eval_samples_per_second": 11.444, "eval_steps_per_second": 1.456, "step": 1003 }, { "epoch": 73.96, "eval_gen_len": 9.1545, "eval_loss": 1.599768042564392, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.498, "eval_samples_per_second": 11.581, "eval_steps_per_second": 1.474, "step": 1017 }, { "epoch": 74.98, "eval_gen_len": 9.5273, "eval_loss": 1.5934444665908813, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5664, "eval_samples_per_second": 11.499, "eval_steps_per_second": 1.463, "step": 1031 }, { "epoch": 76.0, "eval_gen_len": 10.4091, "eval_loss": 1.5867650508880615, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.579, "eval_samples_per_second": 11.483, "eval_steps_per_second": 1.462, "step": 1045 }, { "epoch": 76.95, "eval_gen_len": 10.3, "eval_loss": 1.5827070474624634, "eval_rouge1": 0.0005, "eval_rouge2": 0.0003, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.4875, "eval_samples_per_second": 11.594, "eval_steps_per_second": 1.476, "step": 1058 }, { "epoch": 77.96, "eval_gen_len": 9.6182, "eval_loss": 1.5748482942581177, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.4782, "eval_samples_per_second": 11.606, "eval_steps_per_second": 1.477, "step": 1072 }, { "epoch": 78.98, "eval_gen_len": 9.6273, "eval_loss": 1.5662298202514648, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.4778, "eval_samples_per_second": 11.606, "eval_steps_per_second": 1.477, "step": 1086 }, { "epoch": 80.0, "eval_gen_len": 9.5273, "eval_loss": 1.5585095882415771, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.4851, "eval_samples_per_second": 11.597, "eval_steps_per_second": 1.476, "step": 1100 }, { "epoch": 80.95, "eval_gen_len": 10.1364, "eval_loss": 1.5539778470993042, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6048, "eval_samples_per_second": 11.453, "eval_steps_per_second": 1.458, "step": 1113 }, { "epoch": 81.96, "eval_gen_len": 9.6182, "eval_loss": 1.541092038154602, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4829, "eval_samples_per_second": 11.6, "eval_steps_per_second": 1.476, "step": 1127 }, { "epoch": 82.98, "eval_gen_len": 9.6091, "eval_loss": 1.5330486297607422, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5558, "eval_samples_per_second": 11.511, "eval_steps_per_second": 1.465, "step": 1141 }, { "epoch": 84.0, "eval_gen_len": 9.0818, "eval_loss": 1.5241070985794067, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6145, "eval_samples_per_second": 11.441, "eval_steps_per_second": 1.456, "step": 1155 }, { "epoch": 84.95, "eval_gen_len": 9.3, "eval_loss": 1.5192241668701172, "eval_rouge1": 0.0004, "eval_rouge2": 0.0, "eval_rougeL": 0.0004, "eval_rougeLsum": 0.0004, "eval_runtime": 9.4781, "eval_samples_per_second": 11.606, "eval_steps_per_second": 1.477, "step": 1168 }, { "epoch": 85.96, "eval_gen_len": 9.5364, "eval_loss": 1.5117179155349731, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5291, "eval_samples_per_second": 11.544, "eval_steps_per_second": 1.469, "step": 1182 }, { "epoch": 86.98, "eval_gen_len": 9.4545, "eval_loss": 1.4990766048431396, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.6213, "eval_samples_per_second": 11.433, "eval_steps_per_second": 1.455, "step": 1196 }, { "epoch": 88.0, "eval_gen_len": 9.4182, "eval_loss": 1.4878661632537842, "eval_rouge1": 0.0011, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.4886, "eval_samples_per_second": 11.593, "eval_steps_per_second": 1.475, "step": 1210 }, { "epoch": 88.95, "eval_gen_len": 9.5727, "eval_loss": 1.479432463645935, "eval_rouge1": 0.0009, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.5867, "eval_samples_per_second": 11.474, "eval_steps_per_second": 1.46, "step": 1223 }, { "epoch": 89.96, "eval_gen_len": 9.3909, "eval_loss": 1.471197247505188, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6139, "eval_samples_per_second": 11.442, "eval_steps_per_second": 1.456, "step": 1237 }, { "epoch": 90.98, "eval_gen_len": 9.5, "eval_loss": 1.4649511575698853, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.5947, "eval_samples_per_second": 11.465, "eval_steps_per_second": 1.459, "step": 1251 }, { "epoch": 92.0, "eval_gen_len": 9.5273, "eval_loss": 1.4548052549362183, "eval_rouge1": 0.0016, "eval_rouge2": 0.0004, "eval_rougeL": 0.0016, "eval_rougeLsum": 0.0017, "eval_runtime": 9.6868, "eval_samples_per_second": 11.356, "eval_steps_per_second": 1.445, "step": 1265 }, { "epoch": 92.95, "eval_gen_len": 9.3727, "eval_loss": 1.444838285446167, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5071, "eval_samples_per_second": 11.57, "eval_steps_per_second": 1.473, "step": 1278 }, { "epoch": 93.96, "eval_gen_len": 9.6, "eval_loss": 1.4365838766098022, "eval_rouge1": 0.0014, "eval_rouge2": 0.0004, "eval_rougeL": 0.0014, "eval_rougeLsum": 0.0015, "eval_runtime": 9.5507, "eval_samples_per_second": 11.517, "eval_steps_per_second": 1.466, "step": 1292 }, { "epoch": 94.98, "eval_gen_len": 9.3364, "eval_loss": 1.4285393953323364, "eval_rouge1": 0.0006, "eval_rouge2": 0.0003, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.6323, "eval_samples_per_second": 11.42, "eval_steps_per_second": 1.453, "step": 1306 }, { "epoch": 96.0, "eval_gen_len": 9.3455, "eval_loss": 1.4242411851882935, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0008, "eval_runtime": 9.5163, "eval_samples_per_second": 11.559, "eval_steps_per_second": 1.471, "step": 1320 }, { "epoch": 96.95, "eval_gen_len": 9.4, "eval_loss": 1.4160754680633545, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.5868, "eval_samples_per_second": 11.474, "eval_steps_per_second": 1.46, "step": 1333 }, { "epoch": 97.96, "eval_gen_len": 9.4455, "eval_loss": 1.4052343368530273, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.6109, "eval_samples_per_second": 11.445, "eval_steps_per_second": 1.457, "step": 1347 }, { "epoch": 98.98, "eval_gen_len": 9.5273, "eval_loss": 1.3928413391113281, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.6591, "eval_samples_per_second": 11.388, "eval_steps_per_second": 1.449, "step": 1361 }, { "epoch": 100.0, "eval_gen_len": 9.5182, "eval_loss": 1.3840864896774292, "eval_rouge1": 0.0011, "eval_rouge2": 0.0, "eval_rougeL": 0.0011, "eval_rougeLsum": 0.0011, "eval_runtime": 9.6661, "eval_samples_per_second": 11.38, "eval_steps_per_second": 1.448, "step": 1375 }, { "epoch": 100.95, "eval_gen_len": 9.3, "eval_loss": 1.381872296333313, "eval_rouge1": 0.0006, "eval_rouge2": 0.0001, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.6513, "eval_samples_per_second": 11.397, "eval_steps_per_second": 1.451, "step": 1388 }, { "epoch": 101.96, "eval_gen_len": 9.3455, "eval_loss": 1.37588632106781, "eval_rouge1": 0.0004, "eval_rouge2": 0.0, "eval_rougeL": 0.0004, "eval_rougeLsum": 0.0004, "eval_runtime": 9.5607, "eval_samples_per_second": 11.505, "eval_steps_per_second": 1.464, "step": 1402 }, { "epoch": 102.98, "eval_gen_len": 9.2636, "eval_loss": 1.3675447702407837, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5449, "eval_samples_per_second": 11.524, "eval_steps_per_second": 1.467, "step": 1416 }, { "epoch": 104.0, "eval_gen_len": 9.2455, "eval_loss": 1.358955979347229, "eval_rouge1": 0.0012, "eval_rouge2": 0.0, "eval_rougeL": 0.0012, "eval_rougeLsum": 0.0012, "eval_runtime": 9.5868, "eval_samples_per_second": 11.474, "eval_steps_per_second": 1.46, "step": 1430 }, { "epoch": 104.95, "eval_gen_len": 9.1455, "eval_loss": 1.3501193523406982, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.6684, "eval_samples_per_second": 11.377, "eval_steps_per_second": 1.448, "step": 1443 }, { "epoch": 105.96, "eval_gen_len": 9.1636, "eval_loss": 1.344258189201355, "eval_rouge1": 0.0007, "eval_rouge2": 0.0003, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.5388, "eval_samples_per_second": 11.532, "eval_steps_per_second": 1.468, "step": 1457 }, { "epoch": 106.98, "eval_gen_len": 9.8, "eval_loss": 1.3355817794799805, "eval_rouge1": 0.001, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.5263, "eval_samples_per_second": 11.547, "eval_steps_per_second": 1.47, "step": 1471 }, { "epoch": 108.0, "eval_gen_len": 9.9182, "eval_loss": 1.3305474519729614, "eval_rouge1": 0.0008, "eval_rouge2": 0.0005, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.6315, "eval_samples_per_second": 11.421, "eval_steps_per_second": 1.454, "step": 1485 }, { "epoch": 108.95, "eval_gen_len": 10.1636, "eval_loss": 1.3212946653366089, "eval_rouge1": 0.0028, "eval_rouge2": 0.001, "eval_rougeL": 0.0025, "eval_rougeLsum": 0.0026, "eval_runtime": 9.5797, "eval_samples_per_second": 11.483, "eval_steps_per_second": 1.461, "step": 1498 }, { "epoch": 109.09, "grad_norm": 5.480063438415527, "learning_rate": 9.784615384615387e-06, "loss": 1.7753, "step": 1500 }, { "epoch": 109.96, "eval_gen_len": 9.9091, "eval_loss": 1.31065833568573, "eval_rouge1": 0.0019, "eval_rouge2": 0.0, "eval_rougeL": 0.0015, "eval_rougeLsum": 0.0016, "eval_runtime": 9.57, "eval_samples_per_second": 11.494, "eval_steps_per_second": 1.463, "step": 1512 }, { "epoch": 110.98, "eval_gen_len": 10.2, "eval_loss": 1.301637053489685, "eval_rouge1": 0.0015, "eval_rouge2": 0.0, "eval_rougeL": 0.0013, "eval_rougeLsum": 0.0013, "eval_runtime": 9.5009, "eval_samples_per_second": 11.578, "eval_steps_per_second": 1.474, "step": 1526 }, { "epoch": 112.0, "eval_gen_len": 9.7091, "eval_loss": 1.2922732830047607, "eval_rouge1": 0.0014, "eval_rouge2": 0.0, "eval_rougeL": 0.0013, "eval_rougeLsum": 0.0014, "eval_runtime": 9.5171, "eval_samples_per_second": 11.558, "eval_steps_per_second": 1.471, "step": 1540 }, { "epoch": 112.95, "eval_gen_len": 9.6273, "eval_loss": 1.2817051410675049, "eval_rouge1": 0.001, "eval_rouge2": 0.0, "eval_rougeL": 0.001, "eval_rougeLsum": 0.001, "eval_runtime": 9.5296, "eval_samples_per_second": 11.543, "eval_steps_per_second": 1.469, "step": 1553 }, { "epoch": 113.96, "eval_gen_len": 9.9818, "eval_loss": 1.2726068496704102, "eval_rouge1": 0.001, "eval_rouge2": 0.0, "eval_rougeL": 0.001, "eval_rougeLsum": 0.001, "eval_runtime": 9.6467, "eval_samples_per_second": 11.403, "eval_steps_per_second": 1.451, "step": 1567 }, { "epoch": 114.98, "eval_gen_len": 9.5273, "eval_loss": 1.2626111507415771, "eval_rouge1": 0.0009, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.5006, "eval_samples_per_second": 11.578, "eval_steps_per_second": 1.474, "step": 1581 }, { "epoch": 116.0, "eval_gen_len": 9.8, "eval_loss": 1.2526050806045532, "eval_rouge1": 0.0026, "eval_rouge2": 0.0006, "eval_rougeL": 0.0019, "eval_rougeLsum": 0.0019, "eval_runtime": 9.5945, "eval_samples_per_second": 11.465, "eval_steps_per_second": 1.459, "step": 1595 }, { "epoch": 116.95, "eval_gen_len": 9.6182, "eval_loss": 1.2438002824783325, "eval_rouge1": 0.0012, "eval_rouge2": 0.0, "eval_rougeL": 0.0012, "eval_rougeLsum": 0.0012, "eval_runtime": 9.582, "eval_samples_per_second": 11.48, "eval_steps_per_second": 1.461, "step": 1608 }, { "epoch": 117.96, "eval_gen_len": 9.5727, "eval_loss": 1.235589861869812, "eval_rouge1": 0.0016, "eval_rouge2": 0.0, "eval_rougeL": 0.0013, "eval_rougeLsum": 0.0013, "eval_runtime": 9.5074, "eval_samples_per_second": 11.57, "eval_steps_per_second": 1.473, "step": 1622 }, { "epoch": 118.98, "eval_gen_len": 9.5727, "eval_loss": 1.2265100479125977, "eval_rouge1": 0.0017, "eval_rouge2": 0.0, "eval_rougeL": 0.0017, "eval_rougeLsum": 0.0017, "eval_runtime": 9.6164, "eval_samples_per_second": 11.439, "eval_steps_per_second": 1.456, "step": 1636 }, { "epoch": 120.0, "eval_gen_len": 9.7182, "eval_loss": 1.21653151512146, "eval_rouge1": 0.0013, "eval_rouge2": 0.0, "eval_rougeL": 0.0011, "eval_rougeLsum": 0.0011, "eval_runtime": 9.5387, "eval_samples_per_second": 11.532, "eval_steps_per_second": 1.468, "step": 1650 }, { "epoch": 120.95, "eval_gen_len": 9.7182, "eval_loss": 1.208518624305725, "eval_rouge1": 0.0018, "eval_rouge2": 0.0, "eval_rougeL": 0.0014, "eval_rougeLsum": 0.0015, "eval_runtime": 9.6008, "eval_samples_per_second": 11.457, "eval_steps_per_second": 1.458, "step": 1663 }, { "epoch": 121.96, "eval_gen_len": 9.4182, "eval_loss": 1.1974164247512817, "eval_rouge1": 0.0024, "eval_rouge2": 0.0007, "eval_rougeL": 0.0022, "eval_rougeLsum": 0.0022, "eval_runtime": 9.5182, "eval_samples_per_second": 11.557, "eval_steps_per_second": 1.471, "step": 1677 }, { "epoch": 122.98, "eval_gen_len": 9.3273, "eval_loss": 1.188578486442566, "eval_rouge1": 0.0013, "eval_rouge2": 0.0003, "eval_rougeL": 0.0013, "eval_rougeLsum": 0.0009, "eval_runtime": 9.5875, "eval_samples_per_second": 11.473, "eval_steps_per_second": 1.46, "step": 1691 }, { "epoch": 124.0, "eval_gen_len": 9.3727, "eval_loss": 1.1796928644180298, "eval_rouge1": 0.0018, "eval_rouge2": 0.0006, "eval_rougeL": 0.0018, "eval_rougeLsum": 0.0018, "eval_runtime": 9.5197, "eval_samples_per_second": 11.555, "eval_steps_per_second": 1.471, "step": 1705 }, { "epoch": 124.95, "eval_gen_len": 9.4091, "eval_loss": 1.1718236207962036, "eval_rouge1": 0.0021, "eval_rouge2": 0.0006, "eval_rougeL": 0.0018, "eval_rougeLsum": 0.0018, "eval_runtime": 9.5197, "eval_samples_per_second": 11.555, "eval_steps_per_second": 1.471, "step": 1718 }, { "epoch": 125.96, "eval_gen_len": 9.3364, "eval_loss": 1.1624401807785034, "eval_rouge1": 0.0026, "eval_rouge2": 0.0007, "eval_rougeL": 0.0023, "eval_rougeLsum": 0.0023, "eval_runtime": 9.5347, "eval_samples_per_second": 11.537, "eval_steps_per_second": 1.468, "step": 1732 }, { "epoch": 126.98, "eval_gen_len": 9.6364, "eval_loss": 1.1539288759231567, "eval_rouge1": 0.0041, "eval_rouge2": 0.0011, "eval_rougeL": 0.0032, "eval_rougeLsum": 0.0032, "eval_runtime": 9.4835, "eval_samples_per_second": 11.599, "eval_steps_per_second": 1.476, "step": 1746 }, { "epoch": 128.0, "eval_gen_len": 9.5364, "eval_loss": 1.1447480916976929, "eval_rouge1": 0.0018, "eval_rouge2": 0.0, "eval_rougeL": 0.0017, "eval_rougeLsum": 0.0016, "eval_runtime": 9.5263, "eval_samples_per_second": 11.547, "eval_steps_per_second": 1.47, "step": 1760 }, { "epoch": 128.95, "eval_gen_len": 9.4545, "eval_loss": 1.1359179019927979, "eval_rouge1": 0.0026, "eval_rouge2": 0.0, "eval_rougeL": 0.0021, "eval_rougeLsum": 0.0021, "eval_runtime": 9.5577, "eval_samples_per_second": 11.509, "eval_steps_per_second": 1.465, "step": 1773 }, { "epoch": 129.96, "eval_gen_len": 9.3636, "eval_loss": 1.1250239610671997, "eval_rouge1": 0.0029, "eval_rouge2": 0.0, "eval_rougeL": 0.0026, "eval_rougeLsum": 0.0025, "eval_runtime": 9.5118, "eval_samples_per_second": 11.565, "eval_steps_per_second": 1.472, "step": 1787 }, { "epoch": 130.98, "eval_gen_len": 9.4364, "eval_loss": 1.1156790256500244, "eval_rouge1": 0.0034, "eval_rouge2": 0.0005, "eval_rougeL": 0.0031, "eval_rougeLsum": 0.003, "eval_runtime": 9.4869, "eval_samples_per_second": 11.595, "eval_steps_per_second": 1.476, "step": 1801 }, { "epoch": 132.0, "eval_gen_len": 9.5182, "eval_loss": 1.1065722703933716, "eval_rouge1": 0.0029, "eval_rouge2": 0.0003, "eval_rougeL": 0.0027, "eval_rougeLsum": 0.0026, "eval_runtime": 9.6053, "eval_samples_per_second": 11.452, "eval_steps_per_second": 1.458, "step": 1815 }, { "epoch": 132.95, "eval_gen_len": 9.6636, "eval_loss": 1.0981847047805786, "eval_rouge1": 0.0032, "eval_rouge2": 0.0003, "eval_rougeL": 0.0034, "eval_rougeLsum": 0.0032, "eval_runtime": 9.6558, "eval_samples_per_second": 11.392, "eval_steps_per_second": 1.45, "step": 1828 }, { "epoch": 133.96, "eval_gen_len": 9.7273, "eval_loss": 1.090613603591919, "eval_rouge1": 0.0032, "eval_rouge2": 0.0006, "eval_rougeL": 0.0031, "eval_rougeLsum": 0.0029, "eval_runtime": 9.6549, "eval_samples_per_second": 11.393, "eval_steps_per_second": 1.45, "step": 1842 }, { "epoch": 134.98, "eval_gen_len": 10.0818, "eval_loss": 1.0842803716659546, "eval_rouge1": 0.0061, "eval_rouge2": 0.0008, "eval_rougeL": 0.0052, "eval_rougeLsum": 0.0052, "eval_runtime": 9.4978, "eval_samples_per_second": 11.582, "eval_steps_per_second": 1.474, "step": 1856 }, { "epoch": 136.0, "eval_gen_len": 9.5818, "eval_loss": 1.075701117515564, "eval_rouge1": 0.0046, "eval_rouge2": 0.0011, "eval_rougeL": 0.0042, "eval_rougeLsum": 0.0042, "eval_runtime": 9.6101, "eval_samples_per_second": 11.446, "eval_steps_per_second": 1.457, "step": 1870 }, { "epoch": 136.95, "eval_gen_len": 9.8273, "eval_loss": 1.0663608312606812, "eval_rouge1": 0.0048, "eval_rouge2": 0.0009, "eval_rougeL": 0.0043, "eval_rougeLsum": 0.0043, "eval_runtime": 9.6501, "eval_samples_per_second": 11.399, "eval_steps_per_second": 1.451, "step": 1883 }, { "epoch": 137.96, "eval_gen_len": 9.8545, "eval_loss": 1.0555903911590576, "eval_rouge1": 0.0055, "eval_rouge2": 0.0007, "eval_rougeL": 0.0046, "eval_rougeLsum": 0.0045, "eval_runtime": 9.5822, "eval_samples_per_second": 11.48, "eval_steps_per_second": 1.461, "step": 1897 }, { "epoch": 138.98, "eval_gen_len": 9.9182, "eval_loss": 1.0459803342819214, "eval_rouge1": 0.0067, "eval_rouge2": 0.0007, "eval_rougeL": 0.0063, "eval_rougeLsum": 0.0062, "eval_runtime": 9.659, "eval_samples_per_second": 11.388, "eval_steps_per_second": 1.449, "step": 1911 }, { "epoch": 140.0, "eval_gen_len": 10.1636, "eval_loss": 1.0374290943145752, "eval_rouge1": 0.0067, "eval_rouge2": 0.0017, "eval_rougeL": 0.0062, "eval_rougeLsum": 0.0061, "eval_runtime": 9.5201, "eval_samples_per_second": 11.555, "eval_steps_per_second": 1.471, "step": 1925 }, { "epoch": 140.95, "eval_gen_len": 9.8909, "eval_loss": 1.0285921096801758, "eval_rouge1": 0.0037, "eval_rouge2": 0.001, "eval_rougeL": 0.0034, "eval_rougeLsum": 0.0034, "eval_runtime": 9.5989, "eval_samples_per_second": 11.46, "eval_steps_per_second": 1.458, "step": 1938 }, { "epoch": 141.96, "eval_gen_len": 9.6455, "eval_loss": 1.0195242166519165, "eval_rouge1": 0.0033, "eval_rouge2": 0.0009, "eval_rougeL": 0.0028, "eval_rougeLsum": 0.0027, "eval_runtime": 9.5173, "eval_samples_per_second": 11.558, "eval_steps_per_second": 1.471, "step": 1952 }, { "epoch": 142.98, "eval_gen_len": 9.8182, "eval_loss": 1.0105475187301636, "eval_rouge1": 0.0031, "eval_rouge2": 0.0009, "eval_rougeL": 0.0028, "eval_rougeLsum": 0.0028, "eval_runtime": 9.5853, "eval_samples_per_second": 11.476, "eval_steps_per_second": 1.461, "step": 1966 }, { "epoch": 144.0, "eval_gen_len": 9.5091, "eval_loss": 1.001856803894043, "eval_rouge1": 0.0025, "eval_rouge2": 0.0005, "eval_rougeL": 0.0022, "eval_rougeLsum": 0.002, "eval_runtime": 9.5828, "eval_samples_per_second": 11.479, "eval_steps_per_second": 1.461, "step": 1980 }, { "epoch": 144.95, "eval_gen_len": 9.6909, "eval_loss": 0.9937859773635864, "eval_rouge1": 0.0023, "eval_rouge2": 0.0009, "eval_rougeL": 0.0023, "eval_rougeLsum": 0.0023, "eval_runtime": 9.5054, "eval_samples_per_second": 11.572, "eval_steps_per_second": 1.473, "step": 1993 }, { "epoch": 145.45, "grad_norm": 3.3901820182800293, "learning_rate": 6.365811965811967e-06, "loss": 1.4532, "step": 2000 }, { "epoch": 145.96, "eval_gen_len": 9.4182, "eval_loss": 0.985722541809082, "eval_rouge1": 0.0023, "eval_rouge2": 0.0009, "eval_rougeL": 0.0023, "eval_rougeLsum": 0.0023, "eval_runtime": 9.5248, "eval_samples_per_second": 11.549, "eval_steps_per_second": 1.47, "step": 2007 }, { "epoch": 146.98, "eval_gen_len": 9.6818, "eval_loss": 0.978095531463623, "eval_rouge1": 0.0028, "eval_rouge2": 0.0009, "eval_rougeL": 0.0028, "eval_rougeLsum": 0.0028, "eval_runtime": 9.5123, "eval_samples_per_second": 11.564, "eval_steps_per_second": 1.472, "step": 2021 }, { "epoch": 148.0, "eval_gen_len": 9.7636, "eval_loss": 0.9693424105644226, "eval_rouge1": 0.0023, "eval_rouge2": 0.0009, "eval_rougeL": 0.0023, "eval_rougeLsum": 0.0023, "eval_runtime": 9.6266, "eval_samples_per_second": 11.427, "eval_steps_per_second": 1.454, "step": 2035 }, { "epoch": 148.95, "eval_gen_len": 9.7545, "eval_loss": 0.9614344835281372, "eval_rouge1": 0.0016, "eval_rouge2": 0.0002, "eval_rougeL": 0.0017, "eval_rougeLsum": 0.0016, "eval_runtime": 9.5312, "eval_samples_per_second": 11.541, "eval_steps_per_second": 1.469, "step": 2048 }, { "epoch": 149.96, "eval_gen_len": 9.7545, "eval_loss": 0.9523000121116638, "eval_rouge1": 0.0025, "eval_rouge2": 0.0009, "eval_rougeL": 0.0026, "eval_rougeLsum": 0.0028, "eval_runtime": 9.5972, "eval_samples_per_second": 11.462, "eval_steps_per_second": 1.459, "step": 2062 }, { "epoch": 150.98, "eval_gen_len": 9.7, "eval_loss": 0.9436053037643433, "eval_rouge1": 0.0009, "eval_rouge2": 0.0, "eval_rougeL": 0.0009, "eval_rougeLsum": 0.001, "eval_runtime": 9.5824, "eval_samples_per_second": 11.479, "eval_steps_per_second": 1.461, "step": 2076 }, { "epoch": 152.0, "eval_gen_len": 9.8, "eval_loss": 0.9361644983291626, "eval_rouge1": 0.0024, "eval_rouge2": 0.0009, "eval_rougeL": 0.0024, "eval_rougeLsum": 0.0024, "eval_runtime": 9.5387, "eval_samples_per_second": 11.532, "eval_steps_per_second": 1.468, "step": 2090 }, { "epoch": 152.95, "eval_gen_len": 9.8091, "eval_loss": 0.9287785887718201, "eval_rouge1": 0.0028, "eval_rouge2": 0.0011, "eval_rougeL": 0.0028, "eval_rougeLsum": 0.0028, "eval_runtime": 9.5888, "eval_samples_per_second": 11.472, "eval_steps_per_second": 1.46, "step": 2103 }, { "epoch": 153.96, "eval_gen_len": 9.9273, "eval_loss": 0.9205007553100586, "eval_rouge1": 0.0036, "eval_rouge2": 0.0013, "eval_rougeL": 0.0036, "eval_rougeLsum": 0.0036, "eval_runtime": 9.5824, "eval_samples_per_second": 11.479, "eval_steps_per_second": 1.461, "step": 2117 }, { "epoch": 154.98, "eval_gen_len": 10.1, "eval_loss": 0.9119426608085632, "eval_rouge1": 0.0037, "eval_rouge2": 0.0011, "eval_rougeL": 0.0038, "eval_rougeLsum": 0.0037, "eval_runtime": 9.5407, "eval_samples_per_second": 11.53, "eval_steps_per_second": 1.467, "step": 2131 }, { "epoch": 156.0, "eval_gen_len": 9.9364, "eval_loss": 0.9043306112289429, "eval_rouge1": 0.0034, "eval_rouge2": 0.0011, "eval_rougeL": 0.0034, "eval_rougeLsum": 0.0034, "eval_runtime": 9.5334, "eval_samples_per_second": 11.538, "eval_steps_per_second": 1.469, "step": 2145 }, { "epoch": 156.95, "eval_gen_len": 9.7818, "eval_loss": 0.8976907730102539, "eval_rouge1": 0.0033, "eval_rouge2": 0.0007, "eval_rougeL": 0.003, "eval_rougeLsum": 0.0031, "eval_runtime": 9.5568, "eval_samples_per_second": 11.51, "eval_steps_per_second": 1.465, "step": 2158 }, { "epoch": 157.96, "eval_gen_len": 9.7364, "eval_loss": 0.8908756971359253, "eval_rouge1": 0.0033, "eval_rouge2": 0.0007, "eval_rougeL": 0.003, "eval_rougeLsum": 0.0031, "eval_runtime": 9.5534, "eval_samples_per_second": 11.514, "eval_steps_per_second": 1.465, "step": 2172 }, { "epoch": 158.98, "eval_gen_len": 9.6273, "eval_loss": 0.8828199505805969, "eval_rouge1": 0.0017, "eval_rouge2": 0.0002, "eval_rougeL": 0.002, "eval_rougeLsum": 0.0019, "eval_runtime": 9.5064, "eval_samples_per_second": 11.571, "eval_steps_per_second": 1.473, "step": 2186 }, { "epoch": 160.0, "eval_gen_len": 9.8273, "eval_loss": 0.8748722076416016, "eval_rouge1": 0.0045, "eval_rouge2": 0.0015, "eval_rougeL": 0.0036, "eval_rougeLsum": 0.0036, "eval_runtime": 9.6814, "eval_samples_per_second": 11.362, "eval_steps_per_second": 1.446, "step": 2200 }, { "epoch": 160.95, "eval_gen_len": 9.9455, "eval_loss": 0.8685693740844727, "eval_rouge1": 0.0061, "eval_rouge2": 0.0022, "eval_rougeL": 0.0057, "eval_rougeLsum": 0.0057, "eval_runtime": 9.4971, "eval_samples_per_second": 11.583, "eval_steps_per_second": 1.474, "step": 2213 }, { "epoch": 161.96, "eval_gen_len": 9.9364, "eval_loss": 0.8622080087661743, "eval_rouge1": 0.0056, "eval_rouge2": 0.0017, "eval_rougeL": 0.005, "eval_rougeLsum": 0.0051, "eval_runtime": 9.5035, "eval_samples_per_second": 11.575, "eval_steps_per_second": 1.473, "step": 2227 }, { "epoch": 162.98, "eval_gen_len": 9.8636, "eval_loss": 0.8555266261100769, "eval_rouge1": 0.0049, "eval_rouge2": 0.0016, "eval_rougeL": 0.0046, "eval_rougeLsum": 0.0047, "eval_runtime": 9.5934, "eval_samples_per_second": 11.466, "eval_steps_per_second": 1.459, "step": 2241 }, { "epoch": 164.0, "eval_gen_len": 9.9455, "eval_loss": 0.8489392399787903, "eval_rouge1": 0.0065, "eval_rouge2": 0.0025, "eval_rougeL": 0.0062, "eval_rougeLsum": 0.0062, "eval_runtime": 9.588, "eval_samples_per_second": 11.473, "eval_steps_per_second": 1.46, "step": 2255 }, { "epoch": 164.95, "eval_gen_len": 10.1273, "eval_loss": 0.8434127569198608, "eval_rouge1": 0.0078, "eval_rouge2": 0.0022, "eval_rougeL": 0.0073, "eval_rougeLsum": 0.0073, "eval_runtime": 9.5692, "eval_samples_per_second": 11.495, "eval_steps_per_second": 1.463, "step": 2268 }, { "epoch": 165.96, "eval_gen_len": 10.1, "eval_loss": 0.8369239568710327, "eval_rouge1": 0.0072, "eval_rouge2": 0.0019, "eval_rougeL": 0.0068, "eval_rougeLsum": 0.0067, "eval_runtime": 9.5983, "eval_samples_per_second": 11.46, "eval_steps_per_second": 1.459, "step": 2282 }, { "epoch": 166.98, "eval_gen_len": 10.0636, "eval_loss": 0.8303181529045105, "eval_rouge1": 0.0068, "eval_rouge2": 0.0022, "eval_rougeL": 0.0062, "eval_rougeLsum": 0.0061, "eval_runtime": 9.5017, "eval_samples_per_second": 11.577, "eval_steps_per_second": 1.473, "step": 2296 }, { "epoch": 168.0, "eval_gen_len": 9.9, "eval_loss": 0.8243575096130371, "eval_rouge1": 0.0044, "eval_rouge2": 0.0013, "eval_rougeL": 0.0039, "eval_rougeLsum": 0.004, "eval_runtime": 9.5856, "eval_samples_per_second": 11.476, "eval_steps_per_second": 1.461, "step": 2310 }, { "epoch": 168.95, "eval_gen_len": 9.9818, "eval_loss": 0.8190615773200989, "eval_rouge1": 0.0063, "eval_rouge2": 0.0022, "eval_rougeL": 0.0058, "eval_rougeLsum": 0.0057, "eval_runtime": 9.5114, "eval_samples_per_second": 11.565, "eval_steps_per_second": 1.472, "step": 2323 }, { "epoch": 169.96, "eval_gen_len": 9.9818, "eval_loss": 0.8129807114601135, "eval_rouge1": 0.0054, "eval_rouge2": 0.0017, "eval_rougeL": 0.0047, "eval_rougeLsum": 0.0047, "eval_runtime": 9.625, "eval_samples_per_second": 11.429, "eval_steps_per_second": 1.455, "step": 2337 }, { "epoch": 170.98, "eval_gen_len": 9.9727, "eval_loss": 0.8074091672897339, "eval_rouge1": 0.0059, "eval_rouge2": 0.0017, "eval_rougeL": 0.0052, "eval_rougeLsum": 0.0052, "eval_runtime": 9.5111, "eval_samples_per_second": 11.565, "eval_steps_per_second": 1.472, "step": 2351 }, { "epoch": 172.0, "eval_gen_len": 9.9, "eval_loss": 0.8017935752868652, "eval_rouge1": 0.0045, "eval_rouge2": 0.0008, "eval_rougeL": 0.0038, "eval_rougeLsum": 0.0039, "eval_runtime": 9.5064, "eval_samples_per_second": 11.571, "eval_steps_per_second": 1.473, "step": 2365 }, { "epoch": 172.95, "eval_gen_len": 9.7091, "eval_loss": 0.7968164086341858, "eval_rouge1": 0.0031, "eval_rouge2": 0.0005, "eval_rougeL": 0.0027, "eval_rougeLsum": 0.0026, "eval_runtime": 9.5948, "eval_samples_per_second": 11.465, "eval_steps_per_second": 1.459, "step": 2378 }, { "epoch": 173.96, "eval_gen_len": 9.7364, "eval_loss": 0.7917037010192871, "eval_rouge1": 0.0025, "eval_rouge2": 0.0005, "eval_rougeL": 0.0022, "eval_rougeLsum": 0.0022, "eval_runtime": 9.636, "eval_samples_per_second": 11.416, "eval_steps_per_second": 1.453, "step": 2392 }, { "epoch": 174.98, "eval_gen_len": 9.7455, "eval_loss": 0.787342369556427, "eval_rouge1": 0.0033, "eval_rouge2": 0.0016, "eval_rougeL": 0.0027, "eval_rougeLsum": 0.0027, "eval_runtime": 9.5895, "eval_samples_per_second": 11.471, "eval_steps_per_second": 1.46, "step": 2406 }, { "epoch": 176.0, "eval_gen_len": 9.7, "eval_loss": 0.7829388976097107, "eval_rouge1": 0.0033, "eval_rouge2": 0.0016, "eval_rougeL": 0.0028, "eval_rougeLsum": 0.0028, "eval_runtime": 9.5727, "eval_samples_per_second": 11.491, "eval_steps_per_second": 1.462, "step": 2420 }, { "epoch": 176.95, "eval_gen_len": 9.6091, "eval_loss": 0.7783145904541016, "eval_rouge1": 0.0033, "eval_rouge2": 0.0016, "eval_rougeL": 0.0028, "eval_rougeLsum": 0.0028, "eval_runtime": 9.6033, "eval_samples_per_second": 11.454, "eval_steps_per_second": 1.458, "step": 2433 }, { "epoch": 177.96, "eval_gen_len": 9.7091, "eval_loss": 0.7735804319381714, "eval_rouge1": 0.0033, "eval_rouge2": 0.0016, "eval_rougeL": 0.0028, "eval_rougeLsum": 0.0028, "eval_runtime": 9.7017, "eval_samples_per_second": 11.338, "eval_steps_per_second": 1.443, "step": 2447 }, { "epoch": 178.98, "eval_gen_len": 9.6364, "eval_loss": 0.7691650986671448, "eval_rouge1": 0.0026, "eval_rouge2": 0.0016, "eval_rougeL": 0.0028, "eval_rougeLsum": 0.0028, "eval_runtime": 9.6163, "eval_samples_per_second": 11.439, "eval_steps_per_second": 1.456, "step": 2461 }, { "epoch": 180.0, "eval_gen_len": 9.4818, "eval_loss": 0.7652955055236816, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5613, "eval_samples_per_second": 11.505, "eval_steps_per_second": 1.464, "step": 2475 }, { "epoch": 180.95, "eval_gen_len": 9.4818, "eval_loss": 0.7611756920814514, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6858, "eval_samples_per_second": 11.357, "eval_steps_per_second": 1.445, "step": 2488 }, { "epoch": 181.82, "grad_norm": 1.818055272102356, "learning_rate": 2.9470085470085475e-06, "loss": 1.1581, "step": 2500 } ], "logging_steps": 500, "max_steps": 2925, "num_input_tokens_seen": 0, "num_train_epochs": 225, "save_steps": 500, "total_flos": 9.676695641731891e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }