{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.001779061865097297, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.895309325486485e-05, "grad_norm": 0.5721328258514404, "learning_rate": 2.5e-05, "loss": 2.8022, "step": 25 }, { "epoch": 0.0001779061865097297, "grad_norm": 0.8923015594482422, "learning_rate": 2.5e-05, "loss": 2.7907, "step": 50 }, { "epoch": 0.0002668592797645945, "grad_norm": 0.4010128676891327, "learning_rate": 2.5e-05, "loss": 2.2071, "step": 75 }, { "epoch": 0.0003558123730194594, "grad_norm": 1.1923198699951172, "learning_rate": 2.5e-05, "loss": 2.0296, "step": 100 }, { "epoch": 0.00044476546627432423, "grad_norm": 0.44945457577705383, "learning_rate": 2.5e-05, "loss": 1.6782, "step": 125 }, { "epoch": 0.000533718559529189, "grad_norm": 1.238482117652893, "learning_rate": 2.5e-05, "loss": 1.3948, "step": 150 }, { "epoch": 0.0006226716527840539, "grad_norm": 1.2721318006515503, "learning_rate": 2.5e-05, "loss": 1.4995, "step": 175 }, { "epoch": 0.0007116247460389188, "grad_norm": 0.647321343421936, "learning_rate": 2.5e-05, "loss": 1.5624, "step": 200 }, { "epoch": 0.0008005778392937836, "grad_norm": 2.433274984359741, "learning_rate": 2.5e-05, "loss": 1.6035, "step": 225 }, { "epoch": 0.0008895309325486485, "grad_norm": 0.5926241278648376, "learning_rate": 2.5e-05, "loss": 1.4234, "step": 250 }, { "epoch": 0.0009784840258035134, "grad_norm": 1.6820074319839478, "learning_rate": 2.5e-05, "loss": 1.1803, "step": 275 }, { "epoch": 0.001067437119058378, "grad_norm": 1.5319762229919434, "learning_rate": 2.5e-05, "loss": 1.3767, "step": 300 }, { "epoch": 0.001156390212313243, "grad_norm": 0.5725632309913635, "learning_rate": 2.5e-05, "loss": 1.245, "step": 325 }, { "epoch": 0.0012453433055681078, "grad_norm": 0.7384375929832458, "learning_rate": 2.5e-05, "loss": 1.2259, "step": 350 }, { "epoch": 0.0013342963988229727, "grad_norm": 0.5739320516586304, "learning_rate": 2.5e-05, "loss": 1.2383, "step": 375 }, { "epoch": 0.0014232494920778376, "grad_norm": 0.962674617767334, "learning_rate": 2.5e-05, "loss": 1.2162, "step": 400 }, { "epoch": 0.0015122025853327023, "grad_norm": 1.2786195278167725, "learning_rate": 2.5e-05, "loss": 1.224, "step": 425 }, { "epoch": 0.0016011556785875672, "grad_norm": 1.3110697269439697, "learning_rate": 2.5e-05, "loss": 1.3172, "step": 450 }, { "epoch": 0.001690108771842432, "grad_norm": 0.6006544828414917, "learning_rate": 2.5e-05, "loss": 1.3636, "step": 475 }, { "epoch": 0.001779061865097297, "grad_norm": 0.6102003455162048, "learning_rate": 2.5e-05, "loss": 1.1117, "step": 500 } ], "logging_steps": 25, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0200419401728e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }