{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 500, "global_step": 2070, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_gen_len": 74.18840579710145, "eval_loss": 0.04634086787700653, "eval_rouge1": 84.7175, "eval_rouge2": 84.1187, "eval_rougeL": 84.7778, "eval_rougeLsum": 84.4607, "eval_runtime": 11.3083, "eval_samples_per_second": 6.102, "eval_steps_per_second": 1.592, "step": 69 }, { "epoch": 2.0, "eval_gen_len": 70.0, "eval_loss": 0.03123296983540058, "eval_rouge1": 87.2197, "eval_rouge2": 86.9176, "eval_rougeL": 87.1927, "eval_rougeLsum": 87.1243, "eval_runtime": 10.965, "eval_samples_per_second": 6.293, "eval_steps_per_second": 1.642, "step": 138 }, { "epoch": 3.0, "eval_gen_len": 68.05797101449275, "eval_loss": 0.03574327379465103, "eval_rouge1": 87.3839, "eval_rouge2": 87.2143, "eval_rougeL": 87.4316, "eval_rougeLsum": 87.3834, "eval_runtime": 10.4263, "eval_samples_per_second": 6.618, "eval_steps_per_second": 1.726, "step": 207 }, { "epoch": 4.0, "eval_gen_len": 68.05797101449275, "eval_loss": 0.03339318931102753, "eval_rouge1": 87.8426, "eval_rouge2": 87.5124, "eval_rougeL": 87.8504, "eval_rougeLsum": 87.7767, "eval_runtime": 10.962, "eval_samples_per_second": 6.294, "eval_steps_per_second": 1.642, "step": 276 }, { "epoch": 5.0, "eval_gen_len": 65.85507246376811, "eval_loss": 0.03297444432973862, "eval_rouge1": 89.2541, "eval_rouge2": 88.8329, "eval_rougeL": 89.2476, "eval_rougeLsum": 89.1951, "eval_runtime": 10.415, "eval_samples_per_second": 6.625, "eval_steps_per_second": 1.728, "step": 345 }, { "epoch": 6.0, "eval_gen_len": 67.95652173913044, "eval_loss": 0.03524705395102501, "eval_rouge1": 89.8437, "eval_rouge2": 89.6094, "eval_rougeL": 90.0088, "eval_rougeLsum": 89.8354, "eval_runtime": 10.7899, "eval_samples_per_second": 6.395, "eval_steps_per_second": 1.668, "step": 414 }, { "epoch": 7.0, "eval_gen_len": 68.8840579710145, "eval_loss": 0.0350663997232914, "eval_rouge1": 87.6113, "eval_rouge2": 87.1275, "eval_rougeL": 87.5987, "eval_rougeLsum": 87.4656, "eval_runtime": 10.7938, "eval_samples_per_second": 6.393, "eval_steps_per_second": 1.668, "step": 483 }, { "epoch": 7.25, "learning_rate": 3.7922705314009665e-05, "loss": 0.0508, "step": 500 }, { "epoch": 8.0, "eval_gen_len": 64.92753623188406, "eval_loss": 0.034591469913721085, "eval_rouge1": 90.0332, "eval_rouge2": 89.523, "eval_rougeL": 89.93, "eval_rougeLsum": 89.9648, "eval_runtime": 10.6322, "eval_samples_per_second": 6.49, "eval_steps_per_second": 1.693, "step": 552 }, { "epoch": 9.0, "eval_gen_len": 60.21739130434783, "eval_loss": 0.03408576548099518, "eval_rouge1": 90.2056, "eval_rouge2": 89.7318, "eval_rougeL": 90.0764, "eval_rougeLsum": 90.1856, "eval_runtime": 10.1861, "eval_samples_per_second": 6.774, "eval_steps_per_second": 1.767, "step": 621 }, { "epoch": 10.0, "eval_gen_len": 62.492753623188406, "eval_loss": 0.04049117490649223, "eval_rouge1": 90.2441, "eval_rouge2": 89.7403, "eval_rougeL": 90.1241, "eval_rougeLsum": 90.1975, "eval_runtime": 10.0613, "eval_samples_per_second": 6.858, "eval_steps_per_second": 1.789, "step": 690 }, { "epoch": 11.0, "eval_gen_len": 62.6231884057971, "eval_loss": 0.0422113761305809, "eval_rouge1": 89.9563, "eval_rouge2": 89.3932, "eval_rougeL": 89.8517, "eval_rougeLsum": 89.919, "eval_runtime": 10.2103, "eval_samples_per_second": 6.758, "eval_steps_per_second": 1.763, "step": 759 }, { "epoch": 12.0, "eval_gen_len": 64.55072463768116, "eval_loss": 0.04623664915561676, "eval_rouge1": 88.9553, "eval_rouge2": 88.5149, "eval_rougeL": 88.8596, "eval_rougeLsum": 88.8863, "eval_runtime": 10.0663, "eval_samples_per_second": 6.855, "eval_steps_per_second": 1.788, "step": 828 }, { "epoch": 13.0, "eval_gen_len": 68.6086956521739, "eval_loss": 0.04621581733226776, "eval_rouge1": 88.3505, "eval_rouge2": 87.8014, "eval_rougeL": 88.2999, "eval_rougeLsum": 88.1348, "eval_runtime": 10.8311, "eval_samples_per_second": 6.371, "eval_steps_per_second": 1.662, "step": 897 }, { "epoch": 14.0, "eval_gen_len": 63.79710144927536, "eval_loss": 0.04529334604740143, "eval_rouge1": 89.2841, "eval_rouge2": 88.7915, "eval_rougeL": 89.0835, "eval_rougeLsum": 89.1838, "eval_runtime": 10.3712, "eval_samples_per_second": 6.653, "eval_steps_per_second": 1.736, "step": 966 }, { "epoch": 14.49, "learning_rate": 2.5845410628019323e-05, "loss": 0.0047, "step": 1000 }, { "epoch": 15.0, "eval_gen_len": 65.43478260869566, "eval_loss": 0.0474562793970108, "eval_rouge1": 89.207, "eval_rouge2": 88.8346, "eval_rougeL": 89.1459, "eval_rougeLsum": 89.1182, "eval_runtime": 10.3999, "eval_samples_per_second": 6.635, "eval_steps_per_second": 1.731, "step": 1035 }, { "epoch": 16.0, "eval_gen_len": 65.92753623188406, "eval_loss": 0.052605073899030685, "eval_rouge1": 89.7978, "eval_rouge2": 89.3703, "eval_rougeL": 89.7601, "eval_rougeLsum": 89.7866, "eval_runtime": 10.9419, "eval_samples_per_second": 6.306, "eval_steps_per_second": 1.645, "step": 1104 }, { "epoch": 17.0, "eval_gen_len": 66.40579710144928, "eval_loss": 0.051739003509283066, "eval_rouge1": 88.0891, "eval_rouge2": 87.7321, "eval_rougeL": 88.1064, "eval_rougeLsum": 88.0137, "eval_runtime": 10.4936, "eval_samples_per_second": 6.575, "eval_steps_per_second": 1.715, "step": 1173 }, { "epoch": 18.0, "eval_gen_len": 62.10144927536232, "eval_loss": 0.05028064176440239, "eval_rouge1": 90.3002, "eval_rouge2": 89.7609, "eval_rougeL": 90.1585, "eval_rougeLsum": 90.218, "eval_runtime": 10.1757, "eval_samples_per_second": 6.781, "eval_steps_per_second": 1.769, "step": 1242 }, { "epoch": 19.0, "eval_gen_len": 65.69565217391305, "eval_loss": 0.05453991889953613, "eval_rouge1": 88.9807, "eval_rouge2": 88.5391, "eval_rougeL": 88.8142, "eval_rougeLsum": 88.8417, "eval_runtime": 10.5092, "eval_samples_per_second": 6.566, "eval_steps_per_second": 1.713, "step": 1311 }, { "epoch": 20.0, "eval_gen_len": 65.17391304347827, "eval_loss": 0.05472288280725479, "eval_rouge1": 89.2547, "eval_rouge2": 88.8381, "eval_rougeL": 89.1517, "eval_rougeLsum": 89.158, "eval_runtime": 10.261, "eval_samples_per_second": 6.725, "eval_steps_per_second": 1.754, "step": 1380 }, { "epoch": 21.0, "eval_gen_len": 66.08695652173913, "eval_loss": 0.055956244468688965, "eval_rouge1": 88.2792, "eval_rouge2": 87.9155, "eval_rougeL": 88.2849, "eval_rougeLsum": 88.1559, "eval_runtime": 10.3755, "eval_samples_per_second": 6.65, "eval_steps_per_second": 1.735, "step": 1449 }, { "epoch": 21.74, "learning_rate": 1.3768115942028985e-05, "loss": 0.0019, "step": 1500 }, { "epoch": 22.0, "eval_gen_len": 66.40579710144928, "eval_loss": 0.057451289147138596, "eval_rouge1": 88.0891, "eval_rouge2": 87.7321, "eval_rougeL": 88.1064, "eval_rougeLsum": 88.0137, "eval_runtime": 10.8006, "eval_samples_per_second": 6.389, "eval_steps_per_second": 1.667, "step": 1518 }, { "epoch": 23.0, "eval_gen_len": 66.04347826086956, "eval_loss": 0.05758647620677948, "eval_rouge1": 87.7192, "eval_rouge2": 87.309, "eval_rougeL": 87.7299, "eval_rougeLsum": 87.5507, "eval_runtime": 10.8774, "eval_samples_per_second": 6.343, "eval_steps_per_second": 1.655, "step": 1587 }, { "epoch": 24.0, "eval_gen_len": 64.15942028985508, "eval_loss": 0.05580032616853714, "eval_rouge1": 89.0175, "eval_rouge2": 88.5301, "eval_rougeL": 88.8811, "eval_rougeLsum": 88.906, "eval_runtime": 10.4052, "eval_samples_per_second": 6.631, "eval_steps_per_second": 1.73, "step": 1656 }, { "epoch": 25.0, "eval_gen_len": 64.15942028985508, "eval_loss": 0.05610496550798416, "eval_rouge1": 89.0175, "eval_rouge2": 88.5301, "eval_rougeL": 88.8811, "eval_rougeLsum": 88.906, "eval_runtime": 10.2012, "eval_samples_per_second": 6.764, "eval_steps_per_second": 1.764, "step": 1725 }, { "epoch": 26.0, "eval_gen_len": 62.42028985507246, "eval_loss": 0.055899713188409805, "eval_rouge1": 90.1169, "eval_rouge2": 89.6101, "eval_rougeL": 89.9618, "eval_rougeLsum": 90.0139, "eval_runtime": 10.18, "eval_samples_per_second": 6.778, "eval_steps_per_second": 1.768, "step": 1794 }, { "epoch": 27.0, "eval_gen_len": 63.72463768115942, "eval_loss": 0.056867022067308426, "eval_rouge1": 89.1468, "eval_rouge2": 88.6354, "eval_rougeL": 89.0016, "eval_rougeLsum": 89.0138, "eval_runtime": 10.263, "eval_samples_per_second": 6.723, "eval_steps_per_second": 1.754, "step": 1863 }, { "epoch": 28.0, "eval_gen_len": 63.72463768115942, "eval_loss": 0.05622243136167526, "eval_rouge1": 89.1468, "eval_rouge2": 88.6354, "eval_rougeL": 89.0016, "eval_rougeLsum": 89.0138, "eval_runtime": 10.3037, "eval_samples_per_second": 6.697, "eval_steps_per_second": 1.747, "step": 1932 }, { "epoch": 28.99, "learning_rate": 1.6908212560386474e-06, "loss": 0.0013, "step": 2000 }, { "epoch": 29.0, "eval_gen_len": 63.72463768115942, "eval_loss": 0.05626355856657028, "eval_rouge1": 89.1468, "eval_rouge2": 88.6354, "eval_rougeL": 89.0016, "eval_rougeLsum": 89.0138, "eval_runtime": 10.3631, "eval_samples_per_second": 6.658, "eval_steps_per_second": 1.737, "step": 2001 }, { "epoch": 30.0, "eval_gen_len": 63.72463768115942, "eval_loss": 0.05641184747219086, "eval_rouge1": 89.1468, "eval_rouge2": 88.6354, "eval_rougeL": 89.0016, "eval_rougeLsum": 89.0138, "eval_runtime": 10.1065, "eval_samples_per_second": 6.827, "eval_steps_per_second": 1.781, "step": 2070 }, { "epoch": 30.0, "step": 2070, "total_flos": 1.0082653761024e+16, "train_loss": 0.0141942559161048, "train_runtime": 754.2364, "train_samples_per_second": 10.978, "train_steps_per_second": 2.744 } ], "logging_steps": 500, "max_steps": 2070, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 1.0082653761024e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }