{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 604, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.033112582781456956, "grad_norm": 2.8630754947662354, "learning_rate": 8.264462809917356e-07, "loss": 2.5, "step": 20 }, { "epoch": 0.06622516556291391, "grad_norm": 2.044926643371582, "learning_rate": 1.6528925619834712e-06, "loss": 2.5625, "step": 40 }, { "epoch": 0.09933774834437085, "grad_norm": 2.0647263526916504, "learning_rate": 2.479338842975207e-06, "loss": 2.475, "step": 60 }, { "epoch": 0.13245033112582782, "grad_norm": 1.9722806215286255, "learning_rate": 3.3057851239669424e-06, "loss": 2.375, "step": 80 }, { "epoch": 0.16556291390728478, "grad_norm": 1.7632310390472412, "learning_rate": 4.132231404958678e-06, "loss": 2.225, "step": 100 }, { "epoch": 0.1986754966887417, "grad_norm": 2.126565456390381, "learning_rate": 4.958677685950414e-06, "loss": 1.9, "step": 120 }, { "epoch": 0.23178807947019867, "grad_norm": 1.3362902402877808, "learning_rate": 4.980933547537104e-06, "loss": 1.4625, "step": 140 }, { "epoch": 0.26490066225165565, "grad_norm": 1.6563653945922852, "learning_rate": 4.919995460276783e-06, "loss": 1.25, "step": 160 }, { "epoch": 0.2980132450331126, "grad_norm": 1.2334599494934082, "learning_rate": 4.81816262909214e-06, "loss": 1.0375, "step": 180 }, { "epoch": 0.33112582781456956, "grad_norm": 1.2277352809906006, "learning_rate": 4.677155895043723e-06, "loss": 1.05, "step": 200 }, { "epoch": 0.36423841059602646, "grad_norm": 1.204990267753601, "learning_rate": 4.499358086684381e-06, "loss": 0.975, "step": 220 }, { "epoch": 0.3973509933774834, "grad_norm": 1.0835106372833252, "learning_rate": 4.287773753387249e-06, "loss": 0.8875, "step": 240 }, { "epoch": 0.4304635761589404, "grad_norm": 1.209863305091858, "learning_rate": 4.045978392408671e-06, "loss": 0.8938, "step": 260 }, { "epoch": 0.46357615894039733, "grad_norm": 1.1028498411178589, "learning_rate": 3.778058027682004e-06, "loss": 0.9125, "step": 280 }, { "epoch": 0.4966887417218543, "grad_norm": 1.186710238456726, "learning_rate": 3.488540161381304e-06, "loss": 0.8625, "step": 300 }, { "epoch": 0.5298013245033113, "grad_norm": 1.2400007247924805, "learning_rate": 3.18231726508275e-06, "loss": 0.8812, "step": 320 }, { "epoch": 0.5629139072847682, "grad_norm": 1.3776289224624634, "learning_rate": 2.8645641034226584e-06, "loss": 0.7625, "step": 340 }, { "epoch": 0.5960264900662252, "grad_norm": 1.7026677131652832, "learning_rate": 2.5406502873736693e-06, "loss": 0.8313, "step": 360 }, { "epoch": 0.6291390728476821, "grad_norm": 1.2438005208969116, "learning_rate": 2.2160495348738127e-06, "loss": 0.8187, "step": 380 }, { "epoch": 0.6622516556291391, "grad_norm": 1.2019314765930176, "learning_rate": 1.8962471721846555e-06, "loss": 0.825, "step": 400 }, { "epoch": 0.695364238410596, "grad_norm": 1.3416401147842407, "learning_rate": 1.5866474390840126e-06, "loss": 0.7875, "step": 420 }, { "epoch": 0.7284768211920529, "grad_norm": 1.214216709136963, "learning_rate": 1.2924821643137226e-06, "loss": 0.7625, "step": 440 }, { "epoch": 0.7615894039735099, "grad_norm": 2.329277753829956, "learning_rate": 1.018722354547402e-06, "loss": 0.8125, "step": 460 }, { "epoch": 0.7947019867549668, "grad_norm": 1.8247755765914917, "learning_rate": 7.69994190908499e-07, "loss": 0.7844, "step": 480 }, { "epoch": 0.8278145695364238, "grad_norm": 1.354269027709961, "learning_rate": 5.505008525871183e-07, "loss": 0.8313, "step": 500 }, { "epoch": 0.8609271523178808, "grad_norm": 1.941784143447876, "learning_rate": 3.639514886337786e-07, "loss": 0.7844, "step": 520 }, { "epoch": 0.8940397350993378, "grad_norm": 1.4848183393478394, "learning_rate": 2.1349853821348797e-07, "loss": 0.8125, "step": 540 }, { "epoch": 0.9271523178807947, "grad_norm": 1.269209384918213, "learning_rate": 1.0168445852548142e-07, "loss": 0.8187, "step": 560 }, { "epoch": 0.9602649006622517, "grad_norm": 1.4907937049865723, "learning_rate": 3.0398760616796306e-08, "loss": 0.8313, "step": 580 }, { "epoch": 0.9933774834437086, "grad_norm": 1.2604339122772217, "learning_rate": 8.460791279910064e-10, "loss": 0.7156, "step": 600 }, { "epoch": 1.0, "step": 604, "total_flos": 3210595501381632.0, "train_loss": 1.177695571192053, "train_runtime": 189.533, "train_samples_per_second": 12.742, "train_steps_per_second": 3.187 } ], "logging_steps": 20, "max_steps": 604, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3210595501381632.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }