{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 3860, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 0.0256, "eval_gen_len": 19.0, "eval_loss": 0.9372363090515137, "eval_runtime": 38.1578, "eval_samples_per_second": 20.206, "eval_steps_per_second": 1.284, "step": 386 }, { "epoch": 1.3, "learning_rate": 4.352331606217617e-05, "loss": 1.4765, "step": 500 }, { "epoch": 2.0, "eval_bleu": 0.0303, "eval_gen_len": 19.0, "eval_loss": 0.8219472169876099, "eval_runtime": 38.1767, "eval_samples_per_second": 20.196, "eval_steps_per_second": 1.284, "step": 772 }, { "epoch": 2.59, "learning_rate": 3.704663212435233e-05, "loss": 0.9443, "step": 1000 }, { "epoch": 3.0, "eval_bleu": 0.022, "eval_gen_len": 19.0, "eval_loss": 0.7708175778388977, "eval_runtime": 37.8173, "eval_samples_per_second": 20.388, "eval_steps_per_second": 1.296, "step": 1158 }, { "epoch": 3.89, "learning_rate": 3.05699481865285e-05, "loss": 0.8436, "step": 1500 }, { "epoch": 4.0, "eval_bleu": 0.0562, "eval_gen_len": 19.0, "eval_loss": 0.7287958860397339, "eval_runtime": 38.1538, "eval_samples_per_second": 20.208, "eval_steps_per_second": 1.284, "step": 1544 }, { "epoch": 5.0, "eval_bleu": 0.0469, "eval_gen_len": 19.0, "eval_loss": 0.7091419696807861, "eval_runtime": 37.9015, "eval_samples_per_second": 20.342, "eval_steps_per_second": 1.293, "step": 1930 }, { "epoch": 5.18, "learning_rate": 2.4093264248704665e-05, "loss": 0.7916, "step": 2000 }, { "epoch": 6.0, "eval_bleu": 0.0464, "eval_gen_len": 19.0, "eval_loss": 0.6926471590995789, "eval_runtime": 38.039, "eval_samples_per_second": 20.269, "eval_steps_per_second": 1.288, "step": 2316 }, { "epoch": 6.48, "learning_rate": 1.761658031088083e-05, "loss": 0.7568, "step": 2500 }, { "epoch": 7.0, "eval_bleu": 0.0536, "eval_gen_len": 19.0, "eval_loss": 0.6799036860466003, "eval_runtime": 38.099, "eval_samples_per_second": 20.237, "eval_steps_per_second": 1.286, "step": 2702 }, { "epoch": 7.77, "learning_rate": 1.1139896373056995e-05, "loss": 0.7403, "step": 3000 }, { "epoch": 8.0, "eval_bleu": 0.0547, "eval_gen_len": 19.0, "eval_loss": 0.6737177968025208, "eval_runtime": 37.9859, "eval_samples_per_second": 20.297, "eval_steps_per_second": 1.29, "step": 3088 }, { "epoch": 9.0, "eval_bleu": 0.0532, "eval_gen_len": 19.0, "eval_loss": 0.6666902303695679, "eval_runtime": 38.1676, "eval_samples_per_second": 20.2, "eval_steps_per_second": 1.284, "step": 3474 }, { "epoch": 9.07, "learning_rate": 4.663212435233161e-06, "loss": 0.7215, "step": 3500 }, { "epoch": 10.0, "eval_bleu": 0.056, "eval_gen_len": 19.0, "eval_loss": 0.6674807667732239, "eval_runtime": 38.091, "eval_samples_per_second": 20.241, "eval_steps_per_second": 1.286, "step": 3860 }, { "epoch": 10.0, "step": 3860, "total_flos": 2.192693390180352e+16, "train_loss": 0.8798027631532342, "train_runtime": 2043.5708, "train_samples_per_second": 30.173, "train_steps_per_second": 1.889 } ], "max_steps": 3860, "num_train_epochs": 10, "total_flos": 2.192693390180352e+16, "trial_name": null, "trial_params": null }