{ "best_metric": 0.786697247706422, "best_model_checkpoint": "tiny-bert-sst2/run-0/checkpoint-500", "epoch": 0.9487666034155597, "eval_steps": 100, "global_step": 500, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18975332068311196, "grad_norm": 0.7187711000442505, "learning_rate": 2.4275077059878603e-05, "loss": 1.2604, "step": 100 }, { "epoch": 0.18975332068311196, "eval_accuracy": 0.6169724770642202, "eval_loss": 1.181114673614502, "eval_runtime": 2.4577, "eval_samples_per_second": 354.806, "eval_steps_per_second": 2.848, "step": 100 }, { "epoch": 0.3795066413662239, "grad_norm": 1.8982616662979126, "learning_rate": 2.3789299271608236e-05, "loss": 1.1808, "step": 200 }, { "epoch": 0.3795066413662239, "eval_accuracy": 0.7121559633027523, "eval_loss": 1.0743426084518433, "eval_runtime": 2.4969, "eval_samples_per_second": 349.235, "eval_steps_per_second": 2.803, "step": 200 }, { "epoch": 0.5692599620493358, "grad_norm": 2.6504175662994385, "learning_rate": 2.2812529230666033e-05, "loss": 1.0583, "step": 300 }, { "epoch": 0.5692599620493358, "eval_accuracy": 0.7591743119266054, "eval_loss": 0.9464107155799866, "eval_runtime": 2.4114, "eval_samples_per_second": 361.611, "eval_steps_per_second": 2.903, "step": 300 }, { "epoch": 0.7590132827324478, "grad_norm": 2.9992282390594482, "learning_rate": 2.138601927608547e-05, "loss": 0.9339, "step": 400 }, { "epoch": 0.7590132827324478, "eval_accuracy": 0.7775229357798165, "eval_loss": 0.8676859736442566, "eval_runtime": 2.5023, "eval_samples_per_second": 348.476, "eval_steps_per_second": 2.797, "step": 400 }, { "epoch": 0.9487666034155597, "grad_norm": 2.979876756668091, "learning_rate": 1.9570015801402756e-05, "loss": 0.8783, "step": 500 }, { "epoch": 0.9487666034155597, "eval_accuracy": 0.786697247706422, "eval_loss": 0.836247980594635, "eval_runtime": 2.4279, "eval_samples_per_second": 359.162, "eval_steps_per_second": 2.883, "step": 500 } ], "logging_steps": 100, "max_steps": 1581, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7672788856320.0, "train_batch_size": 128, "trial_name": null, "trial_params": { "alpha": 0.8670086094179865, "learning_rate": 2.432723709409012e-05, "num_train_epochs": 3, "temperature": 13, "warmup_ratio": 0.03434495454544933 } }