{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.011886013134044512, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.943006567022257e-05, "eval_loss": 2.6626617908477783, "eval_runtime": 470.5912, "eval_samples_per_second": 15.056, "eval_steps_per_second": 7.529, "step": 1 }, { "epoch": 0.0005943006567022257, "grad_norm": 0.6062536239624023, "learning_rate": 0.0002, "loss": 2.4368, "step": 10 }, { "epoch": 0.0011886013134044513, "grad_norm": 0.45512208342552185, "learning_rate": 0.0002, "loss": 2.2546, "step": 20 }, { "epoch": 0.001782901970106677, "grad_norm": 0.49260321259498596, "learning_rate": 0.0002, "loss": 2.2256, "step": 30 }, { "epoch": 0.0023772026268089027, "grad_norm": 0.48497384786605835, "learning_rate": 0.0002, "loss": 2.2565, "step": 40 }, { "epoch": 0.002971503283511128, "grad_norm": 0.44261446595191956, "learning_rate": 0.0002, "loss": 2.2375, "step": 50 }, { "epoch": 0.002971503283511128, "eval_loss": 2.2133359909057617, "eval_runtime": 470.7348, "eval_samples_per_second": 15.051, "eval_steps_per_second": 7.527, "step": 50 }, { "epoch": 0.003565803940213354, "grad_norm": 0.46642744541168213, "learning_rate": 0.0002, "loss": 2.2245, "step": 60 }, { "epoch": 0.00416010459691558, "grad_norm": 0.46836933493614197, "learning_rate": 0.0002, "loss": 2.1641, "step": 70 }, { "epoch": 0.004754405253617805, "grad_norm": 0.5460557341575623, "learning_rate": 0.0002, "loss": 2.2276, "step": 80 }, { "epoch": 0.005348705910320031, "grad_norm": 0.5021308064460754, "learning_rate": 0.0002, "loss": 2.2139, "step": 90 }, { "epoch": 0.005943006567022256, "grad_norm": 0.5295082330703735, "learning_rate": 0.0002, "loss": 2.1527, "step": 100 }, { "epoch": 0.005943006567022256, "eval_loss": 2.18870210647583, "eval_runtime": 470.6047, "eval_samples_per_second": 15.055, "eval_steps_per_second": 7.529, "step": 100 }, { "epoch": 0.0065373072237244825, "grad_norm": 0.43450927734375, "learning_rate": 0.0002, "loss": 2.1958, "step": 110 }, { "epoch": 0.007131607880426708, "grad_norm": 0.5573306083679199, "learning_rate": 0.0002, "loss": 2.1506, "step": 120 }, { "epoch": 0.007725908537128933, "grad_norm": 0.7600811123847961, "learning_rate": 0.0002, "loss": 2.1196, "step": 130 }, { "epoch": 0.00832020919383116, "grad_norm": 0.449724942445755, "learning_rate": 0.0002, "loss": 2.2089, "step": 140 }, { "epoch": 0.008914509850533384, "grad_norm": 0.48421597480773926, "learning_rate": 0.0002, "loss": 2.216, "step": 150 }, { "epoch": 0.008914509850533384, "eval_loss": 2.1804769039154053, "eval_runtime": 470.7404, "eval_samples_per_second": 15.051, "eval_steps_per_second": 7.526, "step": 150 }, { "epoch": 0.00950881050723561, "grad_norm": 0.5594499707221985, "learning_rate": 0.0002, "loss": 2.1633, "step": 160 }, { "epoch": 0.010103111163937837, "grad_norm": 0.5630470514297485, "learning_rate": 0.0002, "loss": 2.2145, "step": 170 }, { "epoch": 0.010697411820640062, "grad_norm": 0.525554358959198, "learning_rate": 0.0002, "loss": 2.1271, "step": 180 }, { "epoch": 0.011291712477342288, "grad_norm": 0.5869236588478088, "learning_rate": 0.0002, "loss": 2.1515, "step": 190 }, { "epoch": 0.011886013134044512, "grad_norm": 0.5470851063728333, "learning_rate": 0.0002, "loss": 2.1862, "step": 200 }, { "epoch": 0.011886013134044512, "eval_loss": 2.172609567642212, "eval_runtime": 470.6463, "eval_samples_per_second": 15.054, "eval_steps_per_second": 7.528, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.710785240209818e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }