|
{ |
|
"best_metric": 0.7970183486238532, |
|
"best_model_checkpoint": "tiny-bert-sst2/run-0/checkpoint-1000", |
|
"epoch": 1.8975332068311195, |
|
"eval_steps": 100, |
|
"global_step": 1000, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18975332068311196, |
|
"grad_norm": 0.7187711000442505, |
|
"learning_rate": 2.4275077059878603e-05, |
|
"loss": 1.2604, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18975332068311196, |
|
"eval_accuracy": 0.6169724770642202, |
|
"eval_loss": 1.181114673614502, |
|
"eval_runtime": 2.4577, |
|
"eval_samples_per_second": 354.806, |
|
"eval_steps_per_second": 2.848, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3795066413662239, |
|
"grad_norm": 1.8982616662979126, |
|
"learning_rate": 2.3789299271608236e-05, |
|
"loss": 1.1808, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3795066413662239, |
|
"eval_accuracy": 0.7121559633027523, |
|
"eval_loss": 1.0743426084518433, |
|
"eval_runtime": 2.4969, |
|
"eval_samples_per_second": 349.235, |
|
"eval_steps_per_second": 2.803, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5692599620493358, |
|
"grad_norm": 2.6504175662994385, |
|
"learning_rate": 2.2812529230666033e-05, |
|
"loss": 1.0583, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5692599620493358, |
|
"eval_accuracy": 0.7591743119266054, |
|
"eval_loss": 0.9464107155799866, |
|
"eval_runtime": 2.4114, |
|
"eval_samples_per_second": 361.611, |
|
"eval_steps_per_second": 2.903, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7590132827324478, |
|
"grad_norm": 2.9992282390594482, |
|
"learning_rate": 2.138601927608547e-05, |
|
"loss": 0.9339, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7590132827324478, |
|
"eval_accuracy": 0.7775229357798165, |
|
"eval_loss": 0.8676859736442566, |
|
"eval_runtime": 2.5023, |
|
"eval_samples_per_second": 348.476, |
|
"eval_steps_per_second": 2.797, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9487666034155597, |
|
"grad_norm": 2.979876756668091, |
|
"learning_rate": 1.9570015801402756e-05, |
|
"loss": 0.8783, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9487666034155597, |
|
"eval_accuracy": 0.786697247706422, |
|
"eval_loss": 0.836247980594635, |
|
"eval_runtime": 2.4279, |
|
"eval_samples_per_second": 359.162, |
|
"eval_steps_per_second": 2.883, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1385199240986716, |
|
"grad_norm": 3.8972060680389404, |
|
"learning_rate": 1.7441214843436652e-05, |
|
"loss": 0.8286, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1385199240986716, |
|
"eval_accuracy": 0.786697247706422, |
|
"eval_loss": 0.8169480562210083, |
|
"eval_runtime": 2.4359, |
|
"eval_samples_per_second": 357.985, |
|
"eval_steps_per_second": 2.874, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3282732447817835, |
|
"grad_norm": 4.094536304473877, |
|
"learning_rate": 1.5089522946380597e-05, |
|
"loss": 0.7808, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.3282732447817835, |
|
"eval_accuracy": 0.7878440366972477, |
|
"eval_loss": 0.8050708770751953, |
|
"eval_runtime": 2.4399, |
|
"eval_samples_per_second": 357.397, |
|
"eval_steps_per_second": 2.869, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.5180265654648957, |
|
"grad_norm": 6.467775344848633, |
|
"learning_rate": 1.2614260100993157e-05, |
|
"loss": 0.765, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.5180265654648957, |
|
"eval_accuracy": 0.7889908256880734, |
|
"eval_loss": 0.7935003042221069, |
|
"eval_runtime": 2.4349, |
|
"eval_samples_per_second": 358.121, |
|
"eval_steps_per_second": 2.875, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7077798861480076, |
|
"grad_norm": 7.3176445960998535, |
|
"learning_rate": 1.0119965121749416e-05, |
|
"loss": 0.742, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.7077798861480076, |
|
"eval_accuracy": 0.7901376146788991, |
|
"eval_loss": 0.7913622260093689, |
|
"eval_runtime": 2.505, |
|
"eval_samples_per_second": 348.1, |
|
"eval_steps_per_second": 2.794, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8975332068311195, |
|
"grad_norm": 4.671020030975342, |
|
"learning_rate": 7.711980615220306e-06, |
|
"loss": 0.7379, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.8975332068311195, |
|
"eval_accuracy": 0.7970183486238532, |
|
"eval_loss": 0.7775447964668274, |
|
"eval_runtime": 2.4368, |
|
"eval_samples_per_second": 357.839, |
|
"eval_steps_per_second": 2.873, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1581, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 15348240276300.0, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.8670086094179865, |
|
"learning_rate": 2.432723709409012e-05, |
|
"num_train_epochs": 3, |
|
"temperature": 13, |
|
"warmup_ratio": 0.03434495454544933 |
|
} |
|
} |
|
|