{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.013020303535826179, "eval_steps": 5, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006510151767913089, "grad_norm": 17.231752395629883, "learning_rate": 1e-05, "loss": 44.8594, "step": 1 }, { "epoch": 0.0006510151767913089, "eval_loss": 2.7306196689605713, "eval_runtime": 22.5604, "eval_samples_per_second": 114.714, "eval_steps_per_second": 57.357, "step": 1 }, { "epoch": 0.0013020303535826179, "grad_norm": 25.99519920349121, "learning_rate": 2e-05, "loss": 48.9219, "step": 2 }, { "epoch": 0.001953045530373927, "grad_norm": 25.249834060668945, "learning_rate": 3e-05, "loss": 43.4375, "step": 3 }, { "epoch": 0.0026040607071652357, "grad_norm": 21.748672485351562, "learning_rate": 4e-05, "loss": 47.7812, "step": 4 }, { "epoch": 0.0032550758839565447, "grad_norm": 31.247392654418945, "learning_rate": 5e-05, "loss": 49.4531, "step": 5 }, { "epoch": 0.0032550758839565447, "eval_loss": 2.716805934906006, "eval_runtime": 11.3368, "eval_samples_per_second": 228.283, "eval_steps_per_second": 114.141, "step": 5 }, { "epoch": 0.003906091060747854, "grad_norm": 15.069538116455078, "learning_rate": 6e-05, "loss": 43.7188, "step": 6 }, { "epoch": 0.004557106237539163, "grad_norm": 23.0030460357666, "learning_rate": 7e-05, "loss": 45.2891, "step": 7 }, { "epoch": 0.005208121414330471, "grad_norm": 25.9981689453125, "learning_rate": 8e-05, "loss": 48.8125, "step": 8 }, { "epoch": 0.005859136591121781, "grad_norm": 13.974116325378418, "learning_rate": 9e-05, "loss": 44.5156, "step": 9 }, { "epoch": 0.0065101517679130895, "grad_norm": 27.361398696899414, "learning_rate": 0.0001, "loss": 47.6875, "step": 10 }, { "epoch": 0.0065101517679130895, "eval_loss": 2.6825039386749268, "eval_runtime": 11.3339, "eval_samples_per_second": 228.341, "eval_steps_per_second": 114.17, "step": 10 }, { "epoch": 0.007161166944704398, "grad_norm": 16.357383728027344, "learning_rate": 9.755282581475769e-05, "loss": 42.9062, "step": 11 }, { "epoch": 0.007812182121495708, "grad_norm": 15.48184585571289, "learning_rate": 9.045084971874738e-05, "loss": 44.2656, "step": 12 }, { "epoch": 0.008463197298287017, "grad_norm": 15.607367515563965, "learning_rate": 7.938926261462366e-05, "loss": 45.2188, "step": 13 }, { "epoch": 0.009114212475078326, "grad_norm": 15.278877258300781, "learning_rate": 6.545084971874738e-05, "loss": 43.9609, "step": 14 }, { "epoch": 0.009765227651869634, "grad_norm": 17.62762451171875, "learning_rate": 5e-05, "loss": 45.7188, "step": 15 }, { "epoch": 0.009765227651869634, "eval_loss": 2.6543149948120117, "eval_runtime": 11.2253, "eval_samples_per_second": 230.55, "eval_steps_per_second": 115.275, "step": 15 }, { "epoch": 0.010416242828660943, "grad_norm": 16.40966033935547, "learning_rate": 3.4549150281252636e-05, "loss": 41.2578, "step": 16 }, { "epoch": 0.011067258005452251, "grad_norm": 15.669730186462402, "learning_rate": 2.061073738537635e-05, "loss": 44.1328, "step": 17 }, { "epoch": 0.011718273182243562, "grad_norm": 16.64231300354004, "learning_rate": 9.549150281252633e-06, "loss": 44.6562, "step": 18 }, { "epoch": 0.01236928835903487, "grad_norm": 15.17128849029541, "learning_rate": 2.4471741852423237e-06, "loss": 41.8594, "step": 19 }, { "epoch": 0.013020303535826179, "grad_norm": 17.088932037353516, "learning_rate": 0.0, "loss": 45.0234, "step": 20 }, { "epoch": 0.013020303535826179, "eval_loss": 2.647012710571289, "eval_runtime": 16.1732, "eval_samples_per_second": 160.017, "eval_steps_per_second": 80.009, "step": 20 } ], "logging_steps": 1, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 171959044276224.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }