|
{ |
|
"best_metric": 1.1420139074325562, |
|
"best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_data_aug_qnli/checkpoint-33208", |
|
"epoch": 6.0, |
|
"global_step": 199248, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.6899, |
|
"step": 33208 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5923485264506682, |
|
"eval_loss": 1.1420139074325562, |
|
"eval_runtime": 12.6193, |
|
"eval_samples_per_second": 432.909, |
|
"eval_steps_per_second": 3.407, |
|
"step": 33208 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.498, |
|
"step": 66416 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5943620721215449, |
|
"eval_loss": 1.219602346420288, |
|
"eval_runtime": 12.6869, |
|
"eval_samples_per_second": 430.602, |
|
"eval_steps_per_second": 3.389, |
|
"step": 66416 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.4209, |
|
"step": 99624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5976569650375252, |
|
"eval_loss": 1.2370126247406006, |
|
"eval_runtime": 12.7229, |
|
"eval_samples_per_second": 429.383, |
|
"eval_steps_per_second": 3.38, |
|
"step": 99624 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.3746, |
|
"step": 132832 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5972908658246385, |
|
"eval_loss": 1.2783989906311035, |
|
"eval_runtime": 12.6633, |
|
"eval_samples_per_second": 431.405, |
|
"eval_steps_per_second": 3.396, |
|
"step": 132832 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.3449, |
|
"step": 166040 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5938129233022149, |
|
"eval_loss": 1.2648983001708984, |
|
"eval_runtime": 12.6182, |
|
"eval_samples_per_second": 432.944, |
|
"eval_steps_per_second": 3.408, |
|
"step": 166040 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.3238, |
|
"step": 199248 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6113856855207761, |
|
"eval_loss": 1.1661865711212158, |
|
"eval_runtime": 12.5606, |
|
"eval_samples_per_second": 434.933, |
|
"eval_steps_per_second": 3.423, |
|
"step": 199248 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 199248, |
|
"total_flos": 7.996297956242227e+17, |
|
"train_loss": 0.44199674663453337, |
|
"train_runtime": 148025.2261, |
|
"train_samples_per_second": 1435.739, |
|
"train_steps_per_second": 11.217 |
|
} |
|
], |
|
"max_steps": 1660400, |
|
"num_train_epochs": 50, |
|
"total_flos": 7.996297956242227e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|