{ "best_metric": 0.7981651376146789, "best_model_checkpoint": "tiny-bert-sst2/run-0/checkpoint-1500", "epoch": 3.0, "eval_steps": 100, "global_step": 1581, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18975332068311196, "grad_norm": 0.7187711000442505, "learning_rate": 2.4275077059878603e-05, "loss": 1.2604, "step": 100 }, { "epoch": 0.18975332068311196, "eval_accuracy": 0.6169724770642202, "eval_loss": 1.181114673614502, "eval_runtime": 2.4577, "eval_samples_per_second": 354.806, "eval_steps_per_second": 2.848, "step": 100 }, { "epoch": 0.3795066413662239, "grad_norm": 1.8982616662979126, "learning_rate": 2.3789299271608236e-05, "loss": 1.1808, "step": 200 }, { "epoch": 0.3795066413662239, "eval_accuracy": 0.7121559633027523, "eval_loss": 1.0743426084518433, "eval_runtime": 2.4969, "eval_samples_per_second": 349.235, "eval_steps_per_second": 2.803, "step": 200 }, { "epoch": 0.5692599620493358, "grad_norm": 2.6504175662994385, "learning_rate": 2.2812529230666033e-05, "loss": 1.0583, "step": 300 }, { "epoch": 0.5692599620493358, "eval_accuracy": 0.7591743119266054, "eval_loss": 0.9464107155799866, "eval_runtime": 2.4114, "eval_samples_per_second": 361.611, "eval_steps_per_second": 2.903, "step": 300 }, { "epoch": 0.7590132827324478, "grad_norm": 2.9992282390594482, "learning_rate": 2.138601927608547e-05, "loss": 0.9339, "step": 400 }, { "epoch": 0.7590132827324478, "eval_accuracy": 0.7775229357798165, "eval_loss": 0.8676859736442566, "eval_runtime": 2.5023, "eval_samples_per_second": 348.476, "eval_steps_per_second": 2.797, "step": 400 }, { "epoch": 0.9487666034155597, "grad_norm": 2.979876756668091, "learning_rate": 1.9570015801402756e-05, "loss": 0.8783, "step": 500 }, { "epoch": 0.9487666034155597, "eval_accuracy": 0.786697247706422, "eval_loss": 0.836247980594635, "eval_runtime": 2.4279, "eval_samples_per_second": 359.162, "eval_steps_per_second": 2.883, "step": 500 }, { "epoch": 1.1385199240986716, "grad_norm": 3.8972060680389404, "learning_rate": 1.7441214843436652e-05, "loss": 0.8286, "step": 600 }, { "epoch": 1.1385199240986716, "eval_accuracy": 0.786697247706422, "eval_loss": 0.8169480562210083, "eval_runtime": 2.4359, "eval_samples_per_second": 357.985, "eval_steps_per_second": 2.874, "step": 600 }, { "epoch": 1.3282732447817835, "grad_norm": 4.094536304473877, "learning_rate": 1.5089522946380597e-05, "loss": 0.7808, "step": 700 }, { "epoch": 1.3282732447817835, "eval_accuracy": 0.7878440366972477, "eval_loss": 0.8050708770751953, "eval_runtime": 2.4399, "eval_samples_per_second": 357.397, "eval_steps_per_second": 2.869, "step": 700 }, { "epoch": 1.5180265654648957, "grad_norm": 6.467775344848633, "learning_rate": 1.2614260100993157e-05, "loss": 0.765, "step": 800 }, { "epoch": 1.5180265654648957, "eval_accuracy": 0.7889908256880734, "eval_loss": 0.7935003042221069, "eval_runtime": 2.4349, "eval_samples_per_second": 358.121, "eval_steps_per_second": 2.875, "step": 800 }, { "epoch": 1.7077798861480076, "grad_norm": 7.3176445960998535, "learning_rate": 1.0119965121749416e-05, "loss": 0.742, "step": 900 }, { "epoch": 1.7077798861480076, "eval_accuracy": 0.7901376146788991, "eval_loss": 0.7913622260093689, "eval_runtime": 2.505, "eval_samples_per_second": 348.1, "eval_steps_per_second": 2.794, "step": 900 }, { "epoch": 1.8975332068311195, "grad_norm": 4.671020030975342, "learning_rate": 7.711980615220306e-06, "loss": 0.7379, "step": 1000 }, { "epoch": 1.8975332068311195, "eval_accuracy": 0.7970183486238532, "eval_loss": 0.7775447964668274, "eval_runtime": 2.4368, "eval_samples_per_second": 357.839, "eval_steps_per_second": 2.873, "step": 1000 }, { "epoch": 2.0872865275142316, "grad_norm": 3.548794984817505, "learning_rate": 5.492004001563045e-06, "loss": 0.7169, "step": 1100 }, { "epoch": 2.0872865275142316, "eval_accuracy": 0.7981651376146789, "eval_loss": 0.7763888239860535, "eval_runtime": 2.4327, "eval_samples_per_second": 358.447, "eval_steps_per_second": 2.877, "step": 1100 }, { "epoch": 2.2770398481973433, "grad_norm": 2.657698392868042, "learning_rate": 3.5537924846990686e-06, "loss": 0.7053, "step": 1200 }, { "epoch": 2.2770398481973433, "eval_accuracy": 0.8004587155963303, "eval_loss": 0.7718890905380249, "eval_runtime": 2.4731, "eval_samples_per_second": 352.601, "eval_steps_per_second": 2.831, "step": 1200 }, { "epoch": 2.4667931688804554, "grad_norm": 8.931977272033691, "learning_rate": 1.9792033649762944e-06, "loss": 0.7001, "step": 1300 }, { "epoch": 2.4667931688804554, "eval_accuracy": 0.7958715596330275, "eval_loss": 0.7761191129684448, "eval_runtime": 2.4453, "eval_samples_per_second": 356.608, "eval_steps_per_second": 2.863, "step": 1300 }, { "epoch": 2.656546489563567, "grad_norm": 5.575241565704346, "learning_rate": 8.347369254527042e-07, "loss": 0.6988, "step": 1400 }, { "epoch": 2.656546489563567, "eval_accuracy": 0.7981651376146789, "eval_loss": 0.7722569108009338, "eval_runtime": 2.492, "eval_samples_per_second": 349.916, "eval_steps_per_second": 2.809, "step": 1400 }, { "epoch": 2.846299810246679, "grad_norm": 6.262284278869629, "learning_rate": 1.6872789753562043e-07, "loss": 0.7135, "step": 1500 }, { "epoch": 2.846299810246679, "eval_accuracy": 0.7981651376146789, "eval_loss": 0.7717686891555786, "eval_runtime": 2.443, "eval_samples_per_second": 356.945, "eval_steps_per_second": 2.865, "step": 1500 } ], "logging_steps": 100, "max_steps": 1581, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 23032282372320.0, "train_batch_size": 128, "trial_name": null, "trial_params": { "alpha": 0.8670086094179865, "learning_rate": 2.432723709409012e-05, "num_train_epochs": 3, "temperature": 13, "warmup_ratio": 0.03434495454544933 } }