{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.10179597178797353, "eval_steps": 500, "global_step": 7000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00727114084199811, "grad_norm": 25.571533203125, "learning_rate": 1.9951525727720014e-05, "loss": 7.7581, "step": 500 }, { "epoch": 0.01454228168399622, "grad_norm": 12.20069694519043, "learning_rate": 1.990305145544003e-05, "loss": 6.872, "step": 1000 }, { "epoch": 0.021813422525994327, "grad_norm": 11.402491569519043, "learning_rate": 1.985457718316004e-05, "loss": 6.6344, "step": 1500 }, { "epoch": 0.02908456336799244, "grad_norm": 16.95390510559082, "learning_rate": 1.9806102910880053e-05, "loss": 6.4979, "step": 2000 }, { "epoch": 0.03635570420999055, "grad_norm": 13.161620140075684, "learning_rate": 1.9757628638600062e-05, "loss": 6.4306, "step": 2500 }, { "epoch": 0.043626845051988654, "grad_norm": 13.924323081970215, "learning_rate": 1.9709154366320078e-05, "loss": 6.3061, "step": 3000 }, { "epoch": 0.050897985893986766, "grad_norm": 20.163259506225586, "learning_rate": 1.966068009404009e-05, "loss": 6.2785, "step": 3500 }, { "epoch": 0.05816912673598488, "grad_norm": 13.455365180969238, "learning_rate": 1.9612205821760102e-05, "loss": 6.126, "step": 4000 }, { "epoch": 0.06544026757798299, "grad_norm": 14.471346855163574, "learning_rate": 1.9563731549480117e-05, "loss": 6.0571, "step": 4500 }, { "epoch": 0.0727114084199811, "grad_norm": 15.127618789672852, "learning_rate": 1.951525727720013e-05, "loss": 6.1144, "step": 5000 }, { "epoch": 0.0799825492619792, "grad_norm": 12.843366622924805, "learning_rate": 1.946678300492014e-05, "loss": 5.9973, "step": 5500 }, { "epoch": 0.08725369010397731, "grad_norm": 23.14200782775879, "learning_rate": 1.9418308732640154e-05, "loss": 5.9721, "step": 6000 }, { "epoch": 0.09452483094597543, "grad_norm": 24.403915405273438, "learning_rate": 1.9369834460360166e-05, "loss": 5.9525, "step": 6500 }, { "epoch": 0.10179597178797353, "grad_norm": 14.797924041748047, "learning_rate": 1.9321360188080178e-05, "loss": 5.8114, "step": 7000 } ], "logging_steps": 500, "max_steps": 206295, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1513105308502176.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }