{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3858, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.38880248833592534, "grad_norm": 0.6042742133140564, "learning_rate": 1.740798341109383e-05, "loss": 0.2151, "step": 500 }, { "epoch": 0.38880248833592534, "eval_accuracy": 0.9762738234150136, "eval_loss": 0.10295161604881287, "eval_runtime": 26.8587, "eval_samples_per_second": 95.723, "eval_steps_per_second": 11.989, "step": 500 }, { "epoch": 0.7776049766718507, "grad_norm": 0.04766521230340004, "learning_rate": 1.4815966822187664e-05, "loss": 0.1167, "step": 1000 }, { "epoch": 0.7776049766718507, "eval_accuracy": 0.9875534811357448, "eval_loss": 0.05390123650431633, "eval_runtime": 26.735, "eval_samples_per_second": 96.166, "eval_steps_per_second": 12.044, "step": 1000 }, { "epoch": 1.166407465007776, "grad_norm": 0.008871573023498058, "learning_rate": 1.2223950233281495e-05, "loss": 0.0687, "step": 1500 }, { "epoch": 1.166407465007776, "eval_accuracy": 0.9793854531310774, "eval_loss": 0.12513048946857452, "eval_runtime": 27.1074, "eval_samples_per_second": 94.845, "eval_steps_per_second": 11.879, "step": 1500 }, { "epoch": 1.5552099533437014, "grad_norm": 0.0014064661227166653, "learning_rate": 9.631933644375326e-06, "loss": 0.0279, "step": 2000 }, { "epoch": 1.5552099533437014, "eval_accuracy": 0.9778296382730455, "eval_loss": 0.148821160197258, "eval_runtime": 26.879, "eval_samples_per_second": 95.651, "eval_steps_per_second": 11.98, "step": 2000 }, { "epoch": 1.9440124416796267, "grad_norm": 0.002627410925924778, "learning_rate": 7.039917055469155e-06, "loss": 0.0293, "step": 2500 }, { "epoch": 1.9440124416796267, "eval_accuracy": 0.9914430182808246, "eval_loss": 0.04895803704857826, "eval_runtime": 26.8775, "eval_samples_per_second": 95.656, "eval_steps_per_second": 11.98, "step": 2500 }, { "epoch": 2.332814930015552, "grad_norm": 0.0008352847071364522, "learning_rate": 4.447900466562986e-06, "loss": 0.0119, "step": 3000 }, { "epoch": 2.332814930015552, "eval_accuracy": 0.9766627771295215, "eval_loss": 0.15932457149028778, "eval_runtime": 27.4463, "eval_samples_per_second": 93.674, "eval_steps_per_second": 11.732, "step": 3000 }, { "epoch": 2.721617418351477, "grad_norm": 0.006800688803195953, "learning_rate": 1.8558838776568172e-06, "loss": 0.0045, "step": 3500 }, { "epoch": 2.721617418351477, "eval_accuracy": 0.9797744068455854, "eval_loss": 0.14682677388191223, "eval_runtime": 27.6918, "eval_samples_per_second": 92.843, "eval_steps_per_second": 11.628, "step": 3500 } ], "logging_steps": 500, "max_steps": 3858, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 5863351757603040.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }