{ "best_metric": null, "best_model_checkpoint": null, "epoch": 109.0909090909091, "eval_steps": 500, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.95, "eval_gen_len": 13.1091, "eval_loss": 21.40757179260254, "eval_rouge1": 0.0745, "eval_rouge2": 0.0159, "eval_rougeL": 0.0584, "eval_rougeLsum": 0.0582, "eval_runtime": 11.5492, "eval_samples_per_second": 9.524, "eval_steps_per_second": 1.212, "step": 13 }, { "epoch": 1.96, "eval_gen_len": 12.9636, "eval_loss": 21.254241943359375, "eval_rouge1": 0.0726, "eval_rouge2": 0.0153, "eval_rougeL": 0.0568, "eval_rougeLsum": 0.0569, "eval_runtime": 9.5424, "eval_samples_per_second": 11.528, "eval_steps_per_second": 1.467, "step": 27 }, { "epoch": 2.98, "eval_gen_len": 12.9636, "eval_loss": 21.031164169311523, "eval_rouge1": 0.0724, "eval_rouge2": 0.0176, "eval_rougeL": 0.0568, "eval_rougeLsum": 0.0569, "eval_runtime": 9.5313, "eval_samples_per_second": 11.541, "eval_steps_per_second": 1.469, "step": 41 }, { "epoch": 4.0, "eval_gen_len": 12.6727, "eval_loss": 20.743330001831055, "eval_rouge1": 0.0722, "eval_rouge2": 0.0175, "eval_rougeL": 0.055, "eval_rougeLsum": 0.0551, "eval_runtime": 9.5638, "eval_samples_per_second": 11.502, "eval_steps_per_second": 1.464, "step": 55 }, { "epoch": 4.95, "eval_gen_len": 12.5273, "eval_loss": 20.430522918701172, "eval_rouge1": 0.0708, "eval_rouge2": 0.0177, "eval_rougeL": 0.0545, "eval_rougeLsum": 0.0547, "eval_runtime": 9.583, "eval_samples_per_second": 11.479, "eval_steps_per_second": 1.461, "step": 68 }, { "epoch": 5.96, "eval_gen_len": 12.6091, "eval_loss": 20.10591697692871, "eval_rouge1": 0.0725, "eval_rouge2": 0.0185, "eval_rougeL": 0.0559, "eval_rougeLsum": 0.0559, "eval_runtime": 9.5892, "eval_samples_per_second": 11.471, "eval_steps_per_second": 1.46, "step": 82 }, { "epoch": 6.98, "eval_gen_len": 12.6091, "eval_loss": 19.850391387939453, "eval_rouge1": 0.0727, "eval_rouge2": 0.0169, "eval_rougeL": 0.0551, "eval_rougeLsum": 0.0547, "eval_runtime": 9.6928, "eval_samples_per_second": 11.349, "eval_steps_per_second": 1.444, "step": 96 }, { "epoch": 8.0, "eval_gen_len": 13.2909, "eval_loss": 19.628494262695312, "eval_rouge1": 0.0816, "eval_rouge2": 0.0228, "eval_rougeL": 0.062, "eval_rougeLsum": 0.0622, "eval_runtime": 9.7244, "eval_samples_per_second": 11.312, "eval_steps_per_second": 1.44, "step": 110 }, { "epoch": 8.95, "eval_gen_len": 13.2909, "eval_loss": 19.41258430480957, "eval_rouge1": 0.0811, "eval_rouge2": 0.0214, "eval_rougeL": 0.0613, "eval_rougeLsum": 0.0614, "eval_runtime": 9.5996, "eval_samples_per_second": 11.459, "eval_steps_per_second": 1.458, "step": 123 }, { "epoch": 9.96, "eval_gen_len": 13.5182, "eval_loss": 19.160032272338867, "eval_rouge1": 0.0818, "eval_rouge2": 0.0208, "eval_rougeL": 0.0632, "eval_rougeLsum": 0.0633, "eval_runtime": 9.5959, "eval_samples_per_second": 11.463, "eval_steps_per_second": 1.459, "step": 137 }, { "epoch": 10.98, "eval_gen_len": 13.8909, "eval_loss": 18.8905086517334, "eval_rouge1": 0.09, "eval_rouge2": 0.024, "eval_rougeL": 0.0698, "eval_rougeLsum": 0.0696, "eval_runtime": 9.6532, "eval_samples_per_second": 11.395, "eval_steps_per_second": 1.45, "step": 151 }, { "epoch": 12.0, "eval_gen_len": 14.1818, "eval_loss": 18.593591690063477, "eval_rouge1": 0.094, "eval_rouge2": 0.0324, "eval_rougeL": 0.0735, "eval_rougeLsum": 0.0732, "eval_runtime": 9.6893, "eval_samples_per_second": 11.353, "eval_steps_per_second": 1.445, "step": 165 }, { "epoch": 12.95, "eval_gen_len": 14.4364, "eval_loss": 18.286243438720703, "eval_rouge1": 0.0928, "eval_rouge2": 0.0329, "eval_rougeL": 0.0746, "eval_rougeLsum": 0.0749, "eval_runtime": 9.7163, "eval_samples_per_second": 11.321, "eval_steps_per_second": 1.441, "step": 178 }, { "epoch": 13.96, "eval_gen_len": 15.0727, "eval_loss": 17.896913528442383, "eval_rouge1": 0.096, "eval_rouge2": 0.0328, "eval_rougeL": 0.0788, "eval_rougeLsum": 0.0792, "eval_runtime": 9.6929, "eval_samples_per_second": 11.348, "eval_steps_per_second": 1.444, "step": 192 }, { "epoch": 14.98, "eval_gen_len": 15.7364, "eval_loss": 17.442358016967773, "eval_rouge1": 0.1015, "eval_rouge2": 0.0334, "eval_rougeL": 0.0816, "eval_rougeLsum": 0.0818, "eval_runtime": 9.6767, "eval_samples_per_second": 11.368, "eval_steps_per_second": 1.447, "step": 206 }, { "epoch": 16.0, "eval_gen_len": 16.2364, "eval_loss": 16.91552734375, "eval_rouge1": 0.1001, "eval_rouge2": 0.0337, "eval_rougeL": 0.0811, "eval_rougeLsum": 0.0814, "eval_runtime": 9.5645, "eval_samples_per_second": 11.501, "eval_steps_per_second": 1.464, "step": 220 }, { "epoch": 16.95, "eval_gen_len": 16.6818, "eval_loss": 16.372838973999023, "eval_rouge1": 0.102, "eval_rouge2": 0.0336, "eval_rougeL": 0.081, "eval_rougeLsum": 0.0809, "eval_runtime": 9.7488, "eval_samples_per_second": 11.283, "eval_steps_per_second": 1.436, "step": 233 }, { "epoch": 17.96, "eval_gen_len": 16.5364, "eval_loss": 15.727865219116211, "eval_rouge1": 0.0901, "eval_rouge2": 0.0301, "eval_rougeL": 0.0739, "eval_rougeLsum": 0.0741, "eval_runtime": 9.6417, "eval_samples_per_second": 11.409, "eval_steps_per_second": 1.452, "step": 247 }, { "epoch": 18.98, "eval_gen_len": 16.9818, "eval_loss": 14.980103492736816, "eval_rouge1": 0.0828, "eval_rouge2": 0.0258, "eval_rougeL": 0.0673, "eval_rougeLsum": 0.0671, "eval_runtime": 9.6417, "eval_samples_per_second": 11.409, "eval_steps_per_second": 1.452, "step": 261 }, { "epoch": 20.0, "eval_gen_len": 16.4727, "eval_loss": 14.11096477508545, "eval_rouge1": 0.0687, "eval_rouge2": 0.0146, "eval_rougeL": 0.0549, "eval_rougeLsum": 0.0547, "eval_runtime": 9.556, "eval_samples_per_second": 11.511, "eval_steps_per_second": 1.465, "step": 275 }, { "epoch": 20.95, "eval_gen_len": 14.2364, "eval_loss": 13.337847709655762, "eval_rouge1": 0.051, "eval_rouge2": 0.0102, "eval_rougeL": 0.0418, "eval_rougeLsum": 0.0414, "eval_runtime": 9.7157, "eval_samples_per_second": 11.322, "eval_steps_per_second": 1.441, "step": 288 }, { "epoch": 21.96, "eval_gen_len": 12.3818, "eval_loss": 12.585112571716309, "eval_rouge1": 0.0377, "eval_rouge2": 0.007, "eval_rougeL": 0.0317, "eval_rougeLsum": 0.0317, "eval_runtime": 9.5457, "eval_samples_per_second": 11.524, "eval_steps_per_second": 1.467, "step": 302 }, { "epoch": 22.98, "eval_gen_len": 12.0455, "eval_loss": 11.860977172851562, "eval_rouge1": 0.023, "eval_rouge2": 0.0048, "eval_rougeL": 0.0204, "eval_rougeLsum": 0.0204, "eval_runtime": 9.5524, "eval_samples_per_second": 11.515, "eval_steps_per_second": 1.466, "step": 316 }, { "epoch": 24.0, "eval_gen_len": 10.8545, "eval_loss": 11.155168533325195, "eval_rouge1": 0.012, "eval_rouge2": 0.001, "eval_rougeL": 0.0108, "eval_rougeLsum": 0.0108, "eval_runtime": 9.5834, "eval_samples_per_second": 11.478, "eval_steps_per_second": 1.461, "step": 330 }, { "epoch": 24.95, "eval_gen_len": 10.0273, "eval_loss": 10.512735366821289, "eval_rouge1": 0.0037, "eval_rouge2": 0.0005, "eval_rougeL": 0.0035, "eval_rougeLsum": 0.0036, "eval_runtime": 9.613, "eval_samples_per_second": 11.443, "eval_steps_per_second": 1.456, "step": 343 }, { "epoch": 25.96, "eval_gen_len": 11.6, "eval_loss": 9.83348560333252, "eval_rouge1": 0.0039, "eval_rouge2": 0.0002, "eval_rougeL": 0.0038, "eval_rougeLsum": 0.0039, "eval_runtime": 9.5253, "eval_samples_per_second": 11.548, "eval_steps_per_second": 1.47, "step": 357 }, { "epoch": 26.98, "eval_gen_len": 13.0455, "eval_loss": 9.216172218322754, "eval_rouge1": 0.0016, "eval_rouge2": 0.0, "eval_rougeL": 0.0016, "eval_rougeLsum": 0.0016, "eval_runtime": 9.5441, "eval_samples_per_second": 11.525, "eval_steps_per_second": 1.467, "step": 371 }, { "epoch": 28.0, "eval_gen_len": 14.6818, "eval_loss": 8.572382926940918, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.6018, "eval_samples_per_second": 11.456, "eval_steps_per_second": 1.458, "step": 385 }, { "epoch": 28.95, "eval_gen_len": 15.2727, "eval_loss": 8.037731170654297, "eval_rouge1": 0.0009, "eval_rouge2": 0.0, "eval_rougeL": 0.0009, "eval_rougeLsum": 0.0009, "eval_runtime": 9.5646, "eval_samples_per_second": 11.501, "eval_steps_per_second": 1.464, "step": 398 }, { "epoch": 29.96, "eval_gen_len": 16.3909, "eval_loss": 7.415070056915283, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.5548, "eval_samples_per_second": 11.513, "eval_steps_per_second": 1.465, "step": 412 }, { "epoch": 30.98, "eval_gen_len": 17.8364, "eval_loss": 6.802865028381348, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5629, "eval_samples_per_second": 11.503, "eval_steps_per_second": 1.464, "step": 426 }, { "epoch": 32.0, "eval_gen_len": 18.2818, "eval_loss": 6.211207866668701, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5163, "eval_samples_per_second": 11.559, "eval_steps_per_second": 1.471, "step": 440 }, { "epoch": 32.95, "eval_gen_len": 18.7091, "eval_loss": 5.717328071594238, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.505, "eval_samples_per_second": 11.573, "eval_steps_per_second": 1.473, "step": 453 }, { "epoch": 33.96, "eval_gen_len": 18.7091, "eval_loss": 5.17288064956665, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.5443, "eval_samples_per_second": 11.525, "eval_steps_per_second": 1.467, "step": 467 }, { "epoch": 34.98, "eval_gen_len": 19.0, "eval_loss": 4.669548034667969, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0008, "eval_runtime": 9.4845, "eval_samples_per_second": 11.598, "eval_steps_per_second": 1.476, "step": 481 }, { "epoch": 36.0, "eval_gen_len": 19.0, "eval_loss": 4.216309547424316, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.5515, "eval_samples_per_second": 11.516, "eval_steps_per_second": 1.466, "step": 495 }, { "epoch": 36.36, "grad_norm": 5.376431465148926, "learning_rate": 1.660854700854701e-05, "loss": 14.4939, "step": 500 }, { "epoch": 36.95, "eval_gen_len": 19.0, "eval_loss": 3.8450570106506348, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5449, "eval_samples_per_second": 11.525, "eval_steps_per_second": 1.467, "step": 508 }, { "epoch": 37.96, "eval_gen_len": 19.0, "eval_loss": 3.507812261581421, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.5902, "eval_samples_per_second": 11.47, "eval_steps_per_second": 1.46, "step": 522 }, { "epoch": 38.98, "eval_gen_len": 19.0, "eval_loss": 3.231505870819092, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5074, "eval_samples_per_second": 11.57, "eval_steps_per_second": 1.473, "step": 536 }, { "epoch": 40.0, "eval_gen_len": 19.0, "eval_loss": 2.9967288970947266, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5195, "eval_samples_per_second": 11.555, "eval_steps_per_second": 1.471, "step": 550 }, { "epoch": 40.95, "eval_gen_len": 18.9545, "eval_loss": 2.8089849948883057, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5224, "eval_samples_per_second": 11.552, "eval_steps_per_second": 1.47, "step": 563 }, { "epoch": 41.96, "eval_gen_len": 18.3727, "eval_loss": 2.6384663581848145, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.5129, "eval_samples_per_second": 11.563, "eval_steps_per_second": 1.472, "step": 577 }, { "epoch": 42.98, "eval_gen_len": 16.7273, "eval_loss": 2.492598295211792, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.5174, "eval_samples_per_second": 11.558, "eval_steps_per_second": 1.471, "step": 591 }, { "epoch": 44.0, "eval_gen_len": 11.9636, "eval_loss": 2.3677501678466797, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.5796, "eval_samples_per_second": 11.483, "eval_steps_per_second": 1.461, "step": 605 }, { "epoch": 44.95, "eval_gen_len": 9.2455, "eval_loss": 2.2777955532073975, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6685, "eval_samples_per_second": 11.377, "eval_steps_per_second": 1.448, "step": 618 }, { "epoch": 45.96, "eval_gen_len": 7.9455, "eval_loss": 2.198147773742676, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.585, "eval_samples_per_second": 11.476, "eval_steps_per_second": 1.461, "step": 632 }, { "epoch": 46.98, "eval_gen_len": 7.5909, "eval_loss": 2.1306562423706055, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6008, "eval_samples_per_second": 11.457, "eval_steps_per_second": 1.458, "step": 646 }, { "epoch": 48.0, "eval_gen_len": 7.4091, "eval_loss": 2.0773117542266846, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.6506, "eval_samples_per_second": 11.398, "eval_steps_per_second": 1.451, "step": 660 }, { "epoch": 48.95, "eval_gen_len": 7.2909, "eval_loss": 2.036808729171753, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5097, "eval_samples_per_second": 11.567, "eval_steps_per_second": 1.472, "step": 673 }, { "epoch": 49.96, "eval_gen_len": 6.8364, "eval_loss": 1.9949748516082764, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6031, "eval_samples_per_second": 11.455, "eval_steps_per_second": 1.458, "step": 687 }, { "epoch": 50.98, "eval_gen_len": 7.8273, "eval_loss": 1.957520842552185, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.5071, "eval_samples_per_second": 11.57, "eval_steps_per_second": 1.473, "step": 701 }, { "epoch": 52.0, "eval_gen_len": 7.5545, "eval_loss": 1.9219788312911987, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5836, "eval_samples_per_second": 11.478, "eval_steps_per_second": 1.461, "step": 715 }, { "epoch": 52.95, "eval_gen_len": 7.5364, "eval_loss": 1.8916202783584595, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6019, "eval_samples_per_second": 11.456, "eval_steps_per_second": 1.458, "step": 728 }, { "epoch": 53.96, "eval_gen_len": 7.1182, "eval_loss": 1.8674402236938477, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5024, "eval_samples_per_second": 11.576, "eval_steps_per_second": 1.473, "step": 742 }, { "epoch": 54.98, "eval_gen_len": 7.0364, "eval_loss": 1.846158742904663, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6139, "eval_samples_per_second": 11.442, "eval_steps_per_second": 1.456, "step": 756 }, { "epoch": 56.0, "eval_gen_len": 7.0, "eval_loss": 1.827086329460144, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.591, "eval_samples_per_second": 11.469, "eval_steps_per_second": 1.46, "step": 770 }, { "epoch": 56.95, "eval_gen_len": 7.5455, "eval_loss": 1.8088210821151733, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.572, "eval_samples_per_second": 11.492, "eval_steps_per_second": 1.463, "step": 783 }, { "epoch": 57.96, "eval_gen_len": 7.9, "eval_loss": 1.789602279663086, "eval_rouge1": 0.0001, "eval_rouge2": 0.0, "eval_rougeL": 0.0001, "eval_rougeLsum": 0.0001, "eval_runtime": 9.5837, "eval_samples_per_second": 11.478, "eval_steps_per_second": 1.461, "step": 797 }, { "epoch": 58.98, "eval_gen_len": 8.2545, "eval_loss": 1.7687013149261475, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.6152, "eval_samples_per_second": 11.44, "eval_steps_per_second": 1.456, "step": 811 }, { "epoch": 60.0, "eval_gen_len": 8.3636, "eval_loss": 1.7496564388275146, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4902, "eval_samples_per_second": 11.591, "eval_steps_per_second": 1.475, "step": 825 }, { "epoch": 60.95, "eval_gen_len": 9.1455, "eval_loss": 1.7332907915115356, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.5867, "eval_samples_per_second": 11.474, "eval_steps_per_second": 1.46, "step": 838 }, { "epoch": 61.96, "eval_gen_len": 8.9, "eval_loss": 1.7185932397842407, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4797, "eval_samples_per_second": 11.604, "eval_steps_per_second": 1.477, "step": 852 }, { "epoch": 62.98, "eval_gen_len": 9.7545, "eval_loss": 1.7047526836395264, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.618, "eval_samples_per_second": 11.437, "eval_steps_per_second": 1.456, "step": 866 }, { "epoch": 64.0, "eval_gen_len": 9.9818, "eval_loss": 1.6921414136886597, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6264, "eval_samples_per_second": 11.427, "eval_steps_per_second": 1.454, "step": 880 }, { "epoch": 64.95, "eval_gen_len": 9.6909, "eval_loss": 1.6816327571868896, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5134, "eval_samples_per_second": 11.563, "eval_steps_per_second": 1.472, "step": 893 }, { "epoch": 65.96, "eval_gen_len": 8.9545, "eval_loss": 1.6697723865509033, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5506, "eval_samples_per_second": 11.518, "eval_steps_per_second": 1.466, "step": 907 }, { "epoch": 66.98, "eval_gen_len": 9.6818, "eval_loss": 1.6568113565444946, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5388, "eval_samples_per_second": 11.532, "eval_steps_per_second": 1.468, "step": 921 }, { "epoch": 68.0, "eval_gen_len": 9.9455, "eval_loss": 1.6469463109970093, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4825, "eval_samples_per_second": 11.6, "eval_steps_per_second": 1.476, "step": 935 }, { "epoch": 68.95, "eval_gen_len": 9.3545, "eval_loss": 1.6408612728118896, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.5212, "eval_samples_per_second": 11.553, "eval_steps_per_second": 1.47, "step": 948 }, { "epoch": 69.96, "eval_gen_len": 9.1545, "eval_loss": 1.6316603422164917, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4986, "eval_samples_per_second": 11.581, "eval_steps_per_second": 1.474, "step": 962 }, { "epoch": 70.98, "eval_gen_len": 9.7818, "eval_loss": 1.623169183731079, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5276, "eval_samples_per_second": 11.545, "eval_steps_per_second": 1.469, "step": 976 }, { "epoch": 72.0, "eval_gen_len": 10.0273, "eval_loss": 1.6152759790420532, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5801, "eval_samples_per_second": 11.482, "eval_steps_per_second": 1.461, "step": 990 }, { "epoch": 72.73, "grad_norm": 3.344996213912964, "learning_rate": 1.3196581196581197e-05, "loss": 2.6089, "step": 1000 }, { "epoch": 72.95, "eval_gen_len": 9.1727, "eval_loss": 1.6071548461914062, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.6124, "eval_samples_per_second": 11.444, "eval_steps_per_second": 1.456, "step": 1003 }, { "epoch": 73.96, "eval_gen_len": 9.1545, "eval_loss": 1.599768042564392, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.498, "eval_samples_per_second": 11.581, "eval_steps_per_second": 1.474, "step": 1017 }, { "epoch": 74.98, "eval_gen_len": 9.5273, "eval_loss": 1.5934444665908813, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5664, "eval_samples_per_second": 11.499, "eval_steps_per_second": 1.463, "step": 1031 }, { "epoch": 76.0, "eval_gen_len": 10.4091, "eval_loss": 1.5867650508880615, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.579, "eval_samples_per_second": 11.483, "eval_steps_per_second": 1.462, "step": 1045 }, { "epoch": 76.95, "eval_gen_len": 10.3, "eval_loss": 1.5827070474624634, "eval_rouge1": 0.0005, "eval_rouge2": 0.0003, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.4875, "eval_samples_per_second": 11.594, "eval_steps_per_second": 1.476, "step": 1058 }, { "epoch": 77.96, "eval_gen_len": 9.6182, "eval_loss": 1.5748482942581177, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.4782, "eval_samples_per_second": 11.606, "eval_steps_per_second": 1.477, "step": 1072 }, { "epoch": 78.98, "eval_gen_len": 9.6273, "eval_loss": 1.5662298202514648, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.4778, "eval_samples_per_second": 11.606, "eval_steps_per_second": 1.477, "step": 1086 }, { "epoch": 80.0, "eval_gen_len": 9.5273, "eval_loss": 1.5585095882415771, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 9.4851, "eval_samples_per_second": 11.597, "eval_steps_per_second": 1.476, "step": 1100 }, { "epoch": 80.95, "eval_gen_len": 10.1364, "eval_loss": 1.5539778470993042, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6048, "eval_samples_per_second": 11.453, "eval_steps_per_second": 1.458, "step": 1113 }, { "epoch": 81.96, "eval_gen_len": 9.6182, "eval_loss": 1.541092038154602, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.4829, "eval_samples_per_second": 11.6, "eval_steps_per_second": 1.476, "step": 1127 }, { "epoch": 82.98, "eval_gen_len": 9.6091, "eval_loss": 1.5330486297607422, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5558, "eval_samples_per_second": 11.511, "eval_steps_per_second": 1.465, "step": 1141 }, { "epoch": 84.0, "eval_gen_len": 9.0818, "eval_loss": 1.5241070985794067, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6145, "eval_samples_per_second": 11.441, "eval_steps_per_second": 1.456, "step": 1155 }, { "epoch": 84.95, "eval_gen_len": 9.3, "eval_loss": 1.5192241668701172, "eval_rouge1": 0.0004, "eval_rouge2": 0.0, "eval_rougeL": 0.0004, "eval_rougeLsum": 0.0004, "eval_runtime": 9.4781, "eval_samples_per_second": 11.606, "eval_steps_per_second": 1.477, "step": 1168 }, { "epoch": 85.96, "eval_gen_len": 9.5364, "eval_loss": 1.5117179155349731, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.5291, "eval_samples_per_second": 11.544, "eval_steps_per_second": 1.469, "step": 1182 }, { "epoch": 86.98, "eval_gen_len": 9.4545, "eval_loss": 1.4990766048431396, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.6213, "eval_samples_per_second": 11.433, "eval_steps_per_second": 1.455, "step": 1196 }, { "epoch": 88.0, "eval_gen_len": 9.4182, "eval_loss": 1.4878661632537842, "eval_rouge1": 0.0011, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.4886, "eval_samples_per_second": 11.593, "eval_steps_per_second": 1.475, "step": 1210 }, { "epoch": 88.95, "eval_gen_len": 9.5727, "eval_loss": 1.479432463645935, "eval_rouge1": 0.0009, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.5867, "eval_samples_per_second": 11.474, "eval_steps_per_second": 1.46, "step": 1223 }, { "epoch": 89.96, "eval_gen_len": 9.3909, "eval_loss": 1.471197247505188, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 9.6139, "eval_samples_per_second": 11.442, "eval_steps_per_second": 1.456, "step": 1237 }, { "epoch": 90.98, "eval_gen_len": 9.5, "eval_loss": 1.4649511575698853, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 9.5947, "eval_samples_per_second": 11.465, "eval_steps_per_second": 1.459, "step": 1251 }, { "epoch": 92.0, "eval_gen_len": 9.5273, "eval_loss": 1.4548052549362183, "eval_rouge1": 0.0016, "eval_rouge2": 0.0004, "eval_rougeL": 0.0016, "eval_rougeLsum": 0.0017, "eval_runtime": 9.6868, "eval_samples_per_second": 11.356, "eval_steps_per_second": 1.445, "step": 1265 }, { "epoch": 92.95, "eval_gen_len": 9.3727, "eval_loss": 1.444838285446167, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5071, "eval_samples_per_second": 11.57, "eval_steps_per_second": 1.473, "step": 1278 }, { "epoch": 93.96, "eval_gen_len": 9.6, "eval_loss": 1.4365838766098022, "eval_rouge1": 0.0014, "eval_rouge2": 0.0004, "eval_rougeL": 0.0014, "eval_rougeLsum": 0.0015, "eval_runtime": 9.5507, "eval_samples_per_second": 11.517, "eval_steps_per_second": 1.466, "step": 1292 }, { "epoch": 94.98, "eval_gen_len": 9.3364, "eval_loss": 1.4285393953323364, "eval_rouge1": 0.0006, "eval_rouge2": 0.0003, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.6323, "eval_samples_per_second": 11.42, "eval_steps_per_second": 1.453, "step": 1306 }, { "epoch": 96.0, "eval_gen_len": 9.3455, "eval_loss": 1.4242411851882935, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0008, "eval_runtime": 9.5163, "eval_samples_per_second": 11.559, "eval_steps_per_second": 1.471, "step": 1320 }, { "epoch": 96.95, "eval_gen_len": 9.4, "eval_loss": 1.4160754680633545, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.5868, "eval_samples_per_second": 11.474, "eval_steps_per_second": 1.46, "step": 1333 }, { "epoch": 97.96, "eval_gen_len": 9.4455, "eval_loss": 1.4052343368530273, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.6109, "eval_samples_per_second": 11.445, "eval_steps_per_second": 1.457, "step": 1347 }, { "epoch": 98.98, "eval_gen_len": 9.5273, "eval_loss": 1.3928413391113281, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.6591, "eval_samples_per_second": 11.388, "eval_steps_per_second": 1.449, "step": 1361 }, { "epoch": 100.0, "eval_gen_len": 9.5182, "eval_loss": 1.3840864896774292, "eval_rouge1": 0.0011, "eval_rouge2": 0.0, "eval_rougeL": 0.0011, "eval_rougeLsum": 0.0011, "eval_runtime": 9.6661, "eval_samples_per_second": 11.38, "eval_steps_per_second": 1.448, "step": 1375 }, { "epoch": 100.95, "eval_gen_len": 9.3, "eval_loss": 1.381872296333313, "eval_rouge1": 0.0006, "eval_rouge2": 0.0001, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 9.6513, "eval_samples_per_second": 11.397, "eval_steps_per_second": 1.451, "step": 1388 }, { "epoch": 101.96, "eval_gen_len": 9.3455, "eval_loss": 1.37588632106781, "eval_rouge1": 0.0004, "eval_rouge2": 0.0, "eval_rougeL": 0.0004, "eval_rougeLsum": 0.0004, "eval_runtime": 9.5607, "eval_samples_per_second": 11.505, "eval_steps_per_second": 1.464, "step": 1402 }, { "epoch": 102.98, "eval_gen_len": 9.2636, "eval_loss": 1.3675447702407837, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.5449, "eval_samples_per_second": 11.524, "eval_steps_per_second": 1.467, "step": 1416 }, { "epoch": 104.0, "eval_gen_len": 9.2455, "eval_loss": 1.358955979347229, "eval_rouge1": 0.0012, "eval_rouge2": 0.0, "eval_rougeL": 0.0012, "eval_rougeLsum": 0.0012, "eval_runtime": 9.5868, "eval_samples_per_second": 11.474, "eval_steps_per_second": 1.46, "step": 1430 }, { "epoch": 104.95, "eval_gen_len": 9.1455, "eval_loss": 1.3501193523406982, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 9.6684, "eval_samples_per_second": 11.377, "eval_steps_per_second": 1.448, "step": 1443 }, { "epoch": 105.96, "eval_gen_len": 9.1636, "eval_loss": 1.344258189201355, "eval_rouge1": 0.0007, "eval_rouge2": 0.0003, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 9.5388, "eval_samples_per_second": 11.532, "eval_steps_per_second": 1.468, "step": 1457 }, { "epoch": 106.98, "eval_gen_len": 9.8, "eval_loss": 1.3355817794799805, "eval_rouge1": 0.001, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.5263, "eval_samples_per_second": 11.547, "eval_steps_per_second": 1.47, "step": 1471 }, { "epoch": 108.0, "eval_gen_len": 9.9182, "eval_loss": 1.3305474519729614, "eval_rouge1": 0.0008, "eval_rouge2": 0.0005, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 9.6315, "eval_samples_per_second": 11.421, "eval_steps_per_second": 1.454, "step": 1485 }, { "epoch": 108.95, "eval_gen_len": 10.1636, "eval_loss": 1.3212946653366089, "eval_rouge1": 0.0028, "eval_rouge2": 0.001, "eval_rougeL": 0.0025, "eval_rougeLsum": 0.0026, "eval_runtime": 9.5797, "eval_samples_per_second": 11.483, "eval_steps_per_second": 1.461, "step": 1498 }, { "epoch": 109.09, "grad_norm": 5.480063438415527, "learning_rate": 9.784615384615387e-06, "loss": 1.7753, "step": 1500 } ], "logging_steps": 500, "max_steps": 2925, "num_input_tokens_seen": 0, "num_train_epochs": 225, "save_steps": 500, "total_flos": 5.805822083648717e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }