|
{ |
|
"best_metric": 1.2555217742919922, |
|
"best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_mnli_128/checkpoint-33748", |
|
"epoch": 16.0, |
|
"global_step": 49088, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.6825, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5256240448293429, |
|
"eval_loss": 1.4580906629562378, |
|
"eval_runtime": 17.2992, |
|
"eval_samples_per_second": 567.368, |
|
"eval_steps_per_second": 4.451, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.4941, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5680081507896078, |
|
"eval_loss": 1.3516298532485962, |
|
"eval_runtime": 17.2707, |
|
"eval_samples_per_second": 568.305, |
|
"eval_steps_per_second": 4.458, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.4199, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5711665817626083, |
|
"eval_loss": 1.325850486755371, |
|
"eval_runtime": 17.2788, |
|
"eval_samples_per_second": 568.038, |
|
"eval_steps_per_second": 4.456, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.3747, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5856342333163526, |
|
"eval_loss": 1.3024353981018066, |
|
"eval_runtime": 17.2902, |
|
"eval_samples_per_second": 567.664, |
|
"eval_steps_per_second": 4.453, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.34, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5930718288334182, |
|
"eval_loss": 1.2874709367752075, |
|
"eval_runtime": 18.1448, |
|
"eval_samples_per_second": 540.927, |
|
"eval_steps_per_second": 4.244, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 1.3087, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5927661742231278, |
|
"eval_loss": 1.2729827165603638, |
|
"eval_runtime": 17.888, |
|
"eval_samples_per_second": 548.691, |
|
"eval_steps_per_second": 4.305, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 1.2769, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5916454406520631, |
|
"eval_loss": 1.2844635248184204, |
|
"eval_runtime": 17.8702, |
|
"eval_samples_per_second": 549.237, |
|
"eval_steps_per_second": 4.309, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.246, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5965359144167092, |
|
"eval_loss": 1.2749732732772827, |
|
"eval_runtime": 17.7491, |
|
"eval_samples_per_second": 552.985, |
|
"eval_steps_per_second": 4.338, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 1.2166, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6020376974019358, |
|
"eval_loss": 1.2651457786560059, |
|
"eval_runtime": 17.7969, |
|
"eval_samples_per_second": 551.501, |
|
"eval_steps_per_second": 4.327, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 1.1883, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6042791645440652, |
|
"eval_loss": 1.2773230075836182, |
|
"eval_runtime": 17.8377, |
|
"eval_samples_per_second": 550.239, |
|
"eval_steps_per_second": 4.317, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 1.1604, |
|
"step": 33748 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6011207335710647, |
|
"eval_loss": 1.2555217742919922, |
|
"eval_runtime": 17.9826, |
|
"eval_samples_per_second": 545.807, |
|
"eval_steps_per_second": 4.282, |
|
"step": 33748 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8e-05, |
|
"loss": 1.1329, |
|
"step": 36816 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5990830361691288, |
|
"eval_loss": 1.2792441844940186, |
|
"eval_runtime": 17.9163, |
|
"eval_samples_per_second": 547.824, |
|
"eval_steps_per_second": 4.298, |
|
"step": 36816 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.7e-05, |
|
"loss": 1.1074, |
|
"step": 39884 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5985736118186449, |
|
"eval_loss": 1.2890774011611938, |
|
"eval_runtime": 17.8675, |
|
"eval_samples_per_second": 549.32, |
|
"eval_steps_per_second": 4.309, |
|
"step": 39884 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 1.0812, |
|
"step": 42952 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5947019867549669, |
|
"eval_loss": 1.2889435291290283, |
|
"eval_runtime": 17.9697, |
|
"eval_samples_per_second": 546.196, |
|
"eval_steps_per_second": 4.285, |
|
"step": 42952 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.0577, |
|
"step": 46020 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5970453387671931, |
|
"eval_loss": 1.2870742082595825, |
|
"eval_runtime": 18.0046, |
|
"eval_samples_per_second": 545.14, |
|
"eval_steps_per_second": 4.277, |
|
"step": 46020 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 1.0338, |
|
"step": 49088 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6026490066225165, |
|
"eval_loss": 1.3296325206756592, |
|
"eval_runtime": 17.9586, |
|
"eval_samples_per_second": 546.536, |
|
"eval_steps_per_second": 4.288, |
|
"step": 49088 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 49088, |
|
"total_flos": 1.4975953101704397e+17, |
|
"train_loss": 1.2575687075220776, |
|
"train_runtime": 27523.0396, |
|
"train_samples_per_second": 713.406, |
|
"train_steps_per_second": 5.574 |
|
} |
|
], |
|
"max_steps": 153400, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.4975953101704397e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|