{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.905829596412556, "eval_steps": 500, "global_step": 1700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11210762331838565, "grad_norm": 2.0963618755340576, "learning_rate": 2.793296089385475e-05, "loss": 4.9671, "step": 100 }, { "epoch": 0.2242152466367713, "grad_norm": 0.3255109190940857, "learning_rate": 4.9408099688473526e-05, "loss": 4.7197, "step": 200 }, { "epoch": 0.336322869955157, "grad_norm": 1.1150901317596436, "learning_rate": 4.632398753894081e-05, "loss": 4.5727, "step": 300 }, { "epoch": 0.4484304932735426, "grad_norm": 4.738381862640381, "learning_rate": 4.320872274143302e-05, "loss": 4.5585, "step": 400 }, { "epoch": 0.5605381165919282, "grad_norm": 0.3568013906478882, "learning_rate": 4.0093457943925236e-05, "loss": 4.5399, "step": 500 }, { "epoch": 0.672645739910314, "grad_norm": 0.558647871017456, "learning_rate": 3.700934579439253e-05, "loss": 4.4905, "step": 600 }, { "epoch": 0.7847533632286996, "grad_norm": 19.64227867126465, "learning_rate": 3.3894080996884734e-05, "loss": 4.4371, "step": 700 }, { "epoch": 0.8968609865470852, "grad_norm": 2.4077465534210205, "learning_rate": 3.077881619937695e-05, "loss": 4.4867, "step": 800 }, { "epoch": 1.0089686098654709, "grad_norm": 1.3140878677368164, "learning_rate": 2.7663551401869157e-05, "loss": 4.4675, "step": 900 }, { "epoch": 1.1210762331838564, "grad_norm": 1.7104419469833374, "learning_rate": 2.4579439252336452e-05, "loss": 4.432, "step": 1000 }, { "epoch": 1.2331838565022422, "grad_norm": 7.763595104217529, "learning_rate": 2.1464174454828662e-05, "loss": 4.4185, "step": 1100 }, { "epoch": 1.3452914798206277, "grad_norm": 0.4148617088794708, "learning_rate": 1.8348909657320872e-05, "loss": 4.428, "step": 1200 }, { "epoch": 1.4573991031390134, "grad_norm": 2.4640185832977295, "learning_rate": 1.5233644859813085e-05, "loss": 4.4133, "step": 1300 }, { "epoch": 1.5695067264573992, "grad_norm": 0.4486638903617859, "learning_rate": 1.2118380062305297e-05, "loss": 4.3019, "step": 1400 }, { "epoch": 1.6816143497757847, "grad_norm": 191.92808532714844, "learning_rate": 9.003115264797508e-06, "loss": 4.4209, "step": 1500 }, { "epoch": 1.7937219730941703, "grad_norm": 2.8736190795898438, "learning_rate": 5.88785046728972e-06, "loss": 4.3696, "step": 1600 }, { "epoch": 1.905829596412556, "grad_norm": 0.13583791255950928, "learning_rate": 2.7725856697819316e-06, "loss": 4.3962, "step": 1700 } ], "logging_steps": 100, "max_steps": 1784, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }