{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 22944, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.996078431372549e-05, "loss": 1.0033, "step": 1529 }, { "epoch": 0.4, "learning_rate": 2.8894377451692577e-05, "loss": 0.8149, "step": 3058 }, { "epoch": 0.6, "learning_rate": 2.667296237105913e-05, "loss": 0.7863, "step": 4587 }, { "epoch": 0.8, "learning_rate": 2.4453000145285484e-05, "loss": 0.7733, "step": 6116 }, { "epoch": 1.0, "learning_rate": 2.223158506465204e-05, "loss": 0.7488, "step": 7645 }, { "epoch": 1.2, "learning_rate": 2.00116228388784e-05, "loss": 0.7485, "step": 9174 }, { "epoch": 1.4, "learning_rate": 1.779020775824495e-05, "loss": 0.7375, "step": 10703 }, { "epoch": 1.6, "learning_rate": 1.5570245532471306e-05, "loss": 0.7229, "step": 12232 }, { "epoch": 1.8, "learning_rate": 1.3348830451837863e-05, "loss": 0.721, "step": 13761 }, { "epoch": 2.0, "learning_rate": 1.1127415371204417e-05, "loss": 0.7253, "step": 15290 }, { "epoch": 2.2, "learning_rate": 8.907453145430772e-06, "loss": 0.7086, "step": 16819 }, { "epoch": 2.4, "learning_rate": 6.688943774516926e-06, "loss": 0.7119, "step": 18348 }, { "epoch": 2.6, "learning_rate": 4.467528693883481e-06, "loss": 0.7104, "step": 19877 }, { "epoch": 2.8, "learning_rate": 2.246113613250036e-06, "loss": 0.7096, "step": 21406 }, { "epoch": 3.0, "learning_rate": 2.7604242336190614e-08, "loss": 0.7066, "step": 22935 } ], "logging_steps": 1529, "max_steps": 22944, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.8652122334919393e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }