|
{ |
|
"best_metric": 0.8126666666666666, |
|
"best_model_checkpoint": "Bert_tinybert-distilled/run-32/checkpoint-860", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 1032, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.8815119862556458, |
|
"learning_rate": 0.00019666908200653298, |
|
"loss": 0.5311, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_AUC": 0.8264693333333334, |
|
"eval_BAC": 0.7413333333333333, |
|
"eval_MCC": 0.48401777889745745, |
|
"eval_PR_AUC": 0.7834479949491041, |
|
"eval_accuracy": 0.7413333333333333, |
|
"eval_loss": 0.47986286878585815, |
|
"eval_runtime": 6.254, |
|
"eval_samples_per_second": 239.845, |
|
"eval_steps_per_second": 7.515, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.847335696220398, |
|
"learning_rate": 0.00016389090167211084, |
|
"loss": 0.4906, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_AUC": 0.8593288888888888, |
|
"eval_BAC": 0.7753333333333333, |
|
"eval_MCC": 0.5809536114294386, |
|
"eval_PR_AUC": 0.8208211551656307, |
|
"eval_accuracy": 0.7753333333333333, |
|
"eval_loss": 0.46062198281288147, |
|
"eval_runtime": 6.2859, |
|
"eval_samples_per_second": 238.629, |
|
"eval_steps_per_second": 7.477, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.6832430362701416, |
|
"learning_rate": 0.00013111272133768865, |
|
"loss": 0.4723, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_AUC": 0.8711697777777777, |
|
"eval_BAC": 0.8086666666666666, |
|
"eval_MCC": 0.6201182679233241, |
|
"eval_PR_AUC": 0.8332422338463044, |
|
"eval_accuracy": 0.8086666666666666, |
|
"eval_loss": 0.42795059084892273, |
|
"eval_runtime": 6.25, |
|
"eval_samples_per_second": 240.001, |
|
"eval_steps_per_second": 7.52, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.4761440753936768, |
|
"learning_rate": 9.833454100326649e-05, |
|
"loss": 0.4616, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_AUC": 0.8759911111111109, |
|
"eval_BAC": 0.808, |
|
"eval_MCC": 0.6199002844761639, |
|
"eval_PR_AUC": 0.8424113281283504, |
|
"eval_accuracy": 0.808, |
|
"eval_loss": 0.4210560619831085, |
|
"eval_runtime": 6.2121, |
|
"eval_samples_per_second": 241.463, |
|
"eval_steps_per_second": 7.566, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.6606786251068115, |
|
"learning_rate": 6.555636066884433e-05, |
|
"loss": 0.4489, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_AUC": 0.8827644444444446, |
|
"eval_BAC": 0.8126666666666666, |
|
"eval_MCC": 0.6317961066346743, |
|
"eval_PR_AUC": 0.8607459277405258, |
|
"eval_accuracy": 0.8126666666666666, |
|
"eval_loss": 0.4215475618839264, |
|
"eval_runtime": 6.2337, |
|
"eval_samples_per_second": 240.629, |
|
"eval_steps_per_second": 7.54, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.446411371231079, |
|
"learning_rate": 3.277818033442216e-05, |
|
"loss": 0.445, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_AUC": 0.881552, |
|
"eval_BAC": 0.8106666666666666, |
|
"eval_MCC": 0.628637794247896, |
|
"eval_PR_AUC": 0.8568648106694358, |
|
"eval_accuracy": 0.8106666666666666, |
|
"eval_loss": 0.42053866386413574, |
|
"eval_runtime": 6.2148, |
|
"eval_samples_per_second": 241.361, |
|
"eval_steps_per_second": 7.563, |
|
"step": 1032 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1204, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2702266380000.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.8238347003864188, |
|
"learning_rate": 0.00022944726234095517, |
|
"num_train_epochs": 7, |
|
"temperature": 4 |
|
} |
|
} |
|
|