{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 7474, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1337971635001338, "grad_norm": 1.7339763641357422, "learning_rate": 5e-05, "loss": 1.0865, "step": 500 }, { "epoch": 0.2675943270002676, "grad_norm": 1.3081908226013184, "learning_rate": 4.64152566676226e-05, "loss": 0.4781, "step": 1000 }, { "epoch": 0.4013914905004014, "grad_norm": 0.8795633316040039, "learning_rate": 4.2830513335245196e-05, "loss": 0.4536, "step": 1500 }, { "epoch": 0.5351886540005352, "grad_norm": 1.046536922454834, "learning_rate": 3.92457700028678e-05, "loss": 0.4486, "step": 2000 }, { "epoch": 0.668985817500669, "grad_norm": 0.68308424949646, "learning_rate": 3.566102667049039e-05, "loss": 0.428, "step": 2500 }, { "epoch": 0.8027829810008028, "grad_norm": 0.7812120318412781, "learning_rate": 3.2076283338113e-05, "loss": 0.4242, "step": 3000 }, { "epoch": 0.9365801445009366, "grad_norm": 0.930639922618866, "learning_rate": 2.8491540005735588e-05, "loss": 0.4259, "step": 3500 }, { "epoch": 1.0703773080010703, "grad_norm": 0.7847789525985718, "learning_rate": 2.490679667335819e-05, "loss": 0.3887, "step": 4000 }, { "epoch": 1.2041744715012042, "grad_norm": 1.0552189350128174, "learning_rate": 2.1322053340980785e-05, "loss": 0.3637, "step": 4500 }, { "epoch": 1.337971635001338, "grad_norm": 0.9732643961906433, "learning_rate": 1.7737310008603386e-05, "loss": 0.3456, "step": 5000 }, { "epoch": 1.4717687985014718, "grad_norm": 0.905820906162262, "learning_rate": 1.4152566676225985e-05, "loss": 0.3446, "step": 5500 }, { "epoch": 1.6055659620016056, "grad_norm": 0.8462561964988708, "learning_rate": 1.056782334384858e-05, "loss": 0.3476, "step": 6000 }, { "epoch": 1.7393631255017392, "grad_norm": 1.2623038291931152, "learning_rate": 6.983080011471179e-06, "loss": 0.3453, "step": 6500 }, { "epoch": 1.873160289001873, "grad_norm": 1.0751724243164062, "learning_rate": 3.398336679093777e-06, "loss": 0.3434, "step": 7000 } ], "logging_steps": 500, "max_steps": 7474, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3880360580415488e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }