{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.01010611419909045, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001010611419909045, "grad_norm": 0.8601729869842529, "learning_rate": 2e-05, "loss": 43.4108, "step": 1 }, { "epoch": 0.001010611419909045, "eval_loss": 10.859463691711426, "eval_runtime": 1.1281, "eval_samples_per_second": 369.645, "eval_steps_per_second": 185.266, "step": 1 }, { "epoch": 0.00202122283981809, "grad_norm": 0.9037514925003052, "learning_rate": 4e-05, "loss": 43.4688, "step": 2 }, { "epoch": 0.003031834259727135, "grad_norm": 0.9492448568344116, "learning_rate": 6e-05, "loss": 43.4276, "step": 3 }, { "epoch": 0.003031834259727135, "eval_loss": 10.859204292297363, "eval_runtime": 1.1395, "eval_samples_per_second": 365.949, "eval_steps_per_second": 183.413, "step": 3 }, { "epoch": 0.00404244567963618, "grad_norm": 0.9523076415061951, "learning_rate": 8e-05, "loss": 43.4389, "step": 4 }, { "epoch": 0.005053057099545225, "grad_norm": 0.8827975392341614, "learning_rate": 0.0001, "loss": 43.4484, "step": 5 }, { "epoch": 0.00606366851945427, "grad_norm": 0.8885377645492554, "learning_rate": 0.00012, "loss": 43.4533, "step": 6 }, { "epoch": 0.00606366851945427, "eval_loss": 10.857632637023926, "eval_runtime": 1.191, "eval_samples_per_second": 350.112, "eval_steps_per_second": 175.476, "step": 6 }, { "epoch": 0.007074279939363315, "grad_norm": 0.8492110967636108, "learning_rate": 0.00014, "loss": 43.4244, "step": 7 }, { "epoch": 0.00808489135927236, "grad_norm": 0.873717188835144, "learning_rate": 0.00016, "loss": 43.4247, "step": 8 }, { "epoch": 0.009095502779181405, "grad_norm": 0.9080751538276672, "learning_rate": 0.00018, "loss": 43.4278, "step": 9 }, { "epoch": 0.009095502779181405, "eval_loss": 10.854981422424316, "eval_runtime": 1.1223, "eval_samples_per_second": 371.562, "eval_steps_per_second": 186.227, "step": 9 }, { "epoch": 0.01010611419909045, "grad_norm": 0.827238917350769, "learning_rate": 0.0002, "loss": 43.4163, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 423414988800.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }