{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 2624, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_gen_len": 10.017182130584192, "eval_loss": 0.6921027898788452, "eval_rouge1": 34.9112, "eval_rouge2": 26.7503, "eval_rougeL": 31.4124, "eval_rougeLsum": 31.7295, "eval_runtime": 139.485, "eval_samples_per_second": 2.086, "eval_steps_per_second": 0.265, "step": 328 }, { "epoch": 1.52, "learning_rate": 0.0003125, "loss": 6.8746, "step": 500 }, { "epoch": 2.0, "eval_gen_len": 10.845360824742269, "eval_loss": 0.6025161147117615, "eval_rouge1": 33.9134, "eval_rouge2": 25.3236, "eval_rougeL": 30.1968, "eval_rougeLsum": 30.472, "eval_runtime": 151.0112, "eval_samples_per_second": 1.927, "eval_steps_per_second": 0.245, "step": 656 }, { "epoch": 3.0, "eval_gen_len": 11.872852233676976, "eval_loss": 0.5686607956886292, "eval_rouge1": 31.6178, "eval_rouge2": 22.9463, "eval_rougeL": 27.8758, "eval_rougeLsum": 28.3572, "eval_runtime": 165.9096, "eval_samples_per_second": 1.754, "eval_steps_per_second": 0.223, "step": 984 }, { "epoch": 3.05, "learning_rate": 0.0004853133981334999, "loss": 0.6462, "step": 1000 }, { "epoch": 4.0, "eval_gen_len": 12.130584192439862, "eval_loss": 0.5354723334312439, "eval_rouge1": 30.8157, "eval_rouge2": 22.1783, "eval_rougeL": 27.1641, "eval_rougeLsum": 27.569, "eval_runtime": 160.3562, "eval_samples_per_second": 1.815, "eval_steps_per_second": 0.231, "step": 1312 }, { "epoch": 4.57, "learning_rate": 0.00033927030930209996, "loss": 0.5618, "step": 1500 }, { "epoch": 5.0, "eval_gen_len": 12.50171821305842, "eval_loss": 0.5159749388694763, "eval_rouge1": 29.9183, "eval_rouge2": 21.0842, "eval_rougeL": 26.1671, "eval_rougeLsum": 26.5965, "eval_runtime": 158.4069, "eval_samples_per_second": 1.837, "eval_steps_per_second": 0.234, "step": 1640 }, { "epoch": 6.0, "eval_gen_len": 12.508591065292096, "eval_loss": 0.5024679899215698, "eval_rouge1": 29.7823, "eval_rouge2": 21.1443, "eval_rougeL": 26.0286, "eval_rougeLsum": 26.5215, "eval_runtime": 154.9067, "eval_samples_per_second": 1.879, "eval_steps_per_second": 0.239, "step": 1968 }, { "epoch": 6.1, "learning_rate": 0.0001310131517407316, "loss": 0.498, "step": 2000 }, { "epoch": 7.0, "eval_gen_len": 12.896907216494846, "eval_loss": 0.4978101849555969, "eval_rouge1": 29.1043, "eval_rouge2": 20.2391, "eval_rougeL": 25.3347, "eval_rougeLsum": 25.804, "eval_runtime": 152.5447, "eval_samples_per_second": 1.908, "eval_steps_per_second": 0.243, "step": 2296 }, { "epoch": 7.62, "learning_rate": 5.680048251060194e-06, "loss": 0.4551, "step": 2500 }, { "epoch": 8.0, "eval_gen_len": 12.738831615120274, "eval_loss": 0.497775673866272, "eval_rouge1": 29.5362, "eval_rouge2": 20.6621, "eval_rougeL": 25.7689, "eval_rougeLsum": 26.2351, "eval_runtime": 159.8028, "eval_samples_per_second": 1.821, "eval_steps_per_second": 0.232, "step": 2624 } ], "logging_steps": 500, "max_steps": 2624, "num_train_epochs": 8, "save_steps": 500, "total_flos": 1.966513744458547e+16, "trial_name": null, "trial_params": null }