|
{ |
|
"best_metric": 0.786697247706422, |
|
"best_model_checkpoint": "tiny-bert-sst2/run-0/checkpoint-500", |
|
"epoch": 0.9487666034155597, |
|
"eval_steps": 100, |
|
"global_step": 500, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18975332068311196, |
|
"grad_norm": 0.7187711000442505, |
|
"learning_rate": 2.4275077059878603e-05, |
|
"loss": 1.2604, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18975332068311196, |
|
"eval_accuracy": 0.6169724770642202, |
|
"eval_loss": 1.181114673614502, |
|
"eval_runtime": 2.4577, |
|
"eval_samples_per_second": 354.806, |
|
"eval_steps_per_second": 2.848, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3795066413662239, |
|
"grad_norm": 1.8982616662979126, |
|
"learning_rate": 2.3789299271608236e-05, |
|
"loss": 1.1808, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3795066413662239, |
|
"eval_accuracy": 0.7121559633027523, |
|
"eval_loss": 1.0743426084518433, |
|
"eval_runtime": 2.4969, |
|
"eval_samples_per_second": 349.235, |
|
"eval_steps_per_second": 2.803, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5692599620493358, |
|
"grad_norm": 2.6504175662994385, |
|
"learning_rate": 2.2812529230666033e-05, |
|
"loss": 1.0583, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5692599620493358, |
|
"eval_accuracy": 0.7591743119266054, |
|
"eval_loss": 0.9464107155799866, |
|
"eval_runtime": 2.4114, |
|
"eval_samples_per_second": 361.611, |
|
"eval_steps_per_second": 2.903, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7590132827324478, |
|
"grad_norm": 2.9992282390594482, |
|
"learning_rate": 2.138601927608547e-05, |
|
"loss": 0.9339, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7590132827324478, |
|
"eval_accuracy": 0.7775229357798165, |
|
"eval_loss": 0.8676859736442566, |
|
"eval_runtime": 2.5023, |
|
"eval_samples_per_second": 348.476, |
|
"eval_steps_per_second": 2.797, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9487666034155597, |
|
"grad_norm": 2.979876756668091, |
|
"learning_rate": 1.9570015801402756e-05, |
|
"loss": 0.8783, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9487666034155597, |
|
"eval_accuracy": 0.786697247706422, |
|
"eval_loss": 0.836247980594635, |
|
"eval_runtime": 2.4279, |
|
"eval_samples_per_second": 359.162, |
|
"eval_steps_per_second": 2.883, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1581, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7672788856320.0, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.8670086094179865, |
|
"learning_rate": 2.432723709409012e-05, |
|
"num_train_epochs": 3, |
|
"temperature": 13, |
|
"warmup_ratio": 0.03434495454544933 |
|
} |
|
} |
|
|