{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7642338555598013, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07642338555598013, "grad_norm": 0.04852772876620293, "learning_rate": 0.00019991780772074993, "loss": 0.631, "step": 100 }, { "epoch": 0.15284677111196027, "grad_norm": 0.06019178777933121, "learning_rate": 0.00019926107993313918, "loss": 0.4967, "step": 200 }, { "epoch": 0.2292701566679404, "grad_norm": 0.06423385441303253, "learning_rate": 0.00019795194081958614, "loss": 0.4377, "step": 300 }, { "epoch": 0.30569354222392053, "grad_norm": 0.05076967179775238, "learning_rate": 0.00019599899493270587, "loss": 0.4121, "step": 400 }, { "epoch": 0.38211692777990064, "grad_norm": 0.06871291249990463, "learning_rate": 0.00019341507836108832, "loss": 0.3974, "step": 500 }, { "epoch": 0.4585403133358808, "grad_norm": 0.062317609786987305, "learning_rate": 0.00019021717436179406, "loss": 0.3826, "step": 600 }, { "epoch": 0.5349636988918609, "grad_norm": 0.054332610219717026, "learning_rate": 0.00018642630173483835, "loss": 0.3695, "step": 700 }, { "epoch": 0.6113870844478411, "grad_norm": 0.07528349757194519, "learning_rate": 0.00018206737667334231, "loss": 0.3589, "step": 800 }, { "epoch": 0.6878104700038211, "grad_norm": 0.07792173326015472, "learning_rate": 0.00017716904899736617, "loss": 0.3527, "step": 900 }, { "epoch": 0.7642338555598013, "grad_norm": 0.08550075441598892, "learning_rate": 0.00017176351384780628, "loss": 0.3407, "step": 1000 } ], "logging_steps": 100, "max_steps": 3924, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.00544163250176e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }