{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.0, "eval_steps": 500, "global_step": 3936, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_gen_len": 15.577319587628866, "eval_loss": 8.096635818481445, "eval_rouge1": 16.5418, "eval_rouge2": 10.3523, "eval_rougeL": 13.972, "eval_rougeLsum": 14.1918, "eval_runtime": 157.1024, "eval_samples_per_second": 1.852, "eval_steps_per_second": 0.236, "step": 328 }, { "epoch": 1.52, "learning_rate": 1.8394004282655248e-05, "loss": 18.3143, "step": 500 }, { "epoch": 2.0, "eval_gen_len": 8.896907216494846, "eval_loss": 0.9259825944900513, "eval_rouge1": 31.5806, "eval_rouge2": 27.0287, "eval_rougeL": 29.6916, "eval_rougeLsum": 30.0327, "eval_runtime": 155.4292, "eval_samples_per_second": 1.872, "eval_steps_per_second": 0.238, "step": 656 }, { "epoch": 3.0, "eval_gen_len": 13.807560137457045, "eval_loss": 0.7708158493041992, "eval_rouge1": 22.6847, "eval_rouge2": 15.805, "eval_rougeL": 19.6336, "eval_rougeLsum": 19.8945, "eval_runtime": 155.3127, "eval_samples_per_second": 1.874, "eval_steps_per_second": 0.238, "step": 984 }, { "epoch": 3.05, "learning_rate": 1.5717344753747325e-05, "loss": 1.0739, "step": 1000 }, { "epoch": 4.0, "eval_gen_len": 9.618556701030927, "eval_loss": 0.7307576537132263, "eval_rouge1": 35.1675, "eval_rouge2": 27.3998, "eval_rougeL": 31.8527, "eval_rougeLsum": 32.0356, "eval_runtime": 133.4505, "eval_samples_per_second": 2.181, "eval_steps_per_second": 0.277, "step": 1312 }, { "epoch": 4.57, "learning_rate": 1.3040685224839403e-05, "loss": 0.8085, "step": 1500 }, { "epoch": 5.0, "eval_gen_len": 10.116838487972508, "eval_loss": 0.708371102809906, "eval_rouge1": 34.4346, "eval_rouge2": 26.202, "eval_rougeL": 30.8999, "eval_rougeLsum": 31.212, "eval_runtime": 139.0571, "eval_samples_per_second": 2.093, "eval_steps_per_second": 0.266, "step": 1640 }, { "epoch": 6.0, "eval_gen_len": 10.268041237113403, "eval_loss": 0.6923775672912598, "eval_rouge1": 34.3345, "eval_rouge2": 26.0144, "eval_rougeL": 30.692, "eval_rougeLsum": 31.0384, "eval_runtime": 141.1422, "eval_samples_per_second": 2.062, "eval_steps_per_second": 0.262, "step": 1968 }, { "epoch": 6.1, "learning_rate": 1.036402569593148e-05, "loss": 0.7597, "step": 2000 }, { "epoch": 7.0, "eval_gen_len": 10.31958762886598, "eval_loss": 0.6812536716461182, "eval_rouge1": 34.3854, "eval_rouge2": 26.0495, "eval_rougeL": 30.8335, "eval_rougeLsum": 31.1696, "eval_runtime": 143.6068, "eval_samples_per_second": 2.026, "eval_steps_per_second": 0.258, "step": 2296 }, { "epoch": 7.62, "learning_rate": 7.687366167023556e-06, "loss": 0.7442, "step": 2500 }, { "epoch": 8.0, "eval_gen_len": 10.360824742268042, "eval_loss": 0.6728662252426147, "eval_rouge1": 34.3758, "eval_rouge2": 26.0079, "eval_rougeL": 30.7863, "eval_rougeLsum": 31.1239, "eval_runtime": 142.7676, "eval_samples_per_second": 2.038, "eval_steps_per_second": 0.259, "step": 2624 }, { "epoch": 9.0, "eval_gen_len": 10.49828178694158, "eval_loss": 0.6669635772705078, "eval_rouge1": 34.2115, "eval_rouge2": 25.7443, "eval_rougeL": 30.5369, "eval_rougeLsum": 30.9282, "eval_runtime": 145.2485, "eval_samples_per_second": 2.003, "eval_steps_per_second": 0.255, "step": 2952 }, { "epoch": 9.15, "learning_rate": 5.010706638115633e-06, "loss": 0.7252, "step": 3000 }, { "epoch": 10.0, "eval_gen_len": 10.529209621993127, "eval_loss": 0.6624875664710999, "eval_rouge1": 34.2518, "eval_rouge2": 25.7147, "eval_rougeL": 30.5433, "eval_rougeLsum": 30.9116, "eval_runtime": 146.9477, "eval_samples_per_second": 1.98, "eval_steps_per_second": 0.252, "step": 3280 }, { "epoch": 10.67, "learning_rate": 2.334047109207709e-06, "loss": 0.7168, "step": 3500 }, { "epoch": 11.0, "eval_gen_len": 10.618556701030927, "eval_loss": 0.6601226925849915, "eval_rouge1": 34.0539, "eval_rouge2": 25.5073, "eval_rougeL": 30.329, "eval_rougeLsum": 30.6828, "eval_runtime": 146.8209, "eval_samples_per_second": 1.982, "eval_steps_per_second": 0.252, "step": 3608 }, { "epoch": 12.0, "eval_gen_len": 10.532646048109966, "eval_loss": 0.6593531370162964, "eval_rouge1": 34.2696, "eval_rouge2": 25.7973, "eval_rougeL": 30.5609, "eval_rougeLsum": 30.9651, "eval_runtime": 145.5529, "eval_samples_per_second": 1.999, "eval_steps_per_second": 0.254, "step": 3936 } ], "logging_steps": 500, "max_steps": 3936, "num_train_epochs": 12, "save_steps": 500, "total_flos": 2.949770616687821e+16, "trial_name": null, "trial_params": null }