|
{ |
|
"best_metric": 2.2965381145477295, |
|
"best_model_checkpoint": "mobilebert_add_GLUE_Experiment_stsb_256/checkpoint-135", |
|
"epoch": 8.0, |
|
"global_step": 360, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 4.3289, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_combined_score": 0.016869338769549977, |
|
"eval_loss": 2.5185155868530273, |
|
"eval_pearson": 0.015152034772122189, |
|
"eval_runtime": 2.6668, |
|
"eval_samples_per_second": 562.482, |
|
"eval_spearmanr": 0.018586642766977766, |
|
"eval_steps_per_second": 4.5, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 2.1532, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_combined_score": 0.031155825147051168, |
|
"eval_loss": 2.5569276809692383, |
|
"eval_pearson": 0.03878201425375371, |
|
"eval_runtime": 2.6592, |
|
"eval_samples_per_second": 564.09, |
|
"eval_spearmanr": 0.023529636040348627, |
|
"eval_steps_per_second": 4.513, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 2.1161, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_combined_score": 0.030822169467473876, |
|
"eval_loss": 2.2965381145477295, |
|
"eval_pearson": 0.039909131733008196, |
|
"eval_runtime": 2.7354, |
|
"eval_samples_per_second": 548.375, |
|
"eval_spearmanr": 0.02173520720193955, |
|
"eval_steps_per_second": 4.387, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 2.1083, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_combined_score": 0.034664425266159364, |
|
"eval_loss": 2.4191055297851562, |
|
"eval_pearson": 0.0409414178434408, |
|
"eval_runtime": 2.6813, |
|
"eval_samples_per_second": 559.437, |
|
"eval_spearmanr": 0.028387432688877935, |
|
"eval_steps_per_second": 4.475, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 2.0885, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_combined_score": 0.04204090753651958, |
|
"eval_loss": 2.5610482692718506, |
|
"eval_pearson": 0.0458438458890056, |
|
"eval_runtime": 2.7067, |
|
"eval_samples_per_second": 554.172, |
|
"eval_spearmanr": 0.03823796918403356, |
|
"eval_steps_per_second": 4.433, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 2.0602, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_combined_score": 0.0458476409316318, |
|
"eval_loss": 2.378797769546509, |
|
"eval_pearson": 0.047272793574367096, |
|
"eval_runtime": 2.6789, |
|
"eval_samples_per_second": 559.941, |
|
"eval_spearmanr": 0.04442248828889651, |
|
"eval_steps_per_second": 4.48, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 2.0283, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_combined_score": 0.05492669237451248, |
|
"eval_loss": 2.4458703994750977, |
|
"eval_pearson": 0.05710597845528931, |
|
"eval_runtime": 2.6878, |
|
"eval_samples_per_second": 558.076, |
|
"eval_spearmanr": 0.05274740629373566, |
|
"eval_steps_per_second": 4.465, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.9677, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_combined_score": 0.06091007949651277, |
|
"eval_loss": 2.4995479583740234, |
|
"eval_pearson": 0.06144666895354363, |
|
"eval_runtime": 2.6805, |
|
"eval_samples_per_second": 559.594, |
|
"eval_spearmanr": 0.060373490039481906, |
|
"eval_steps_per_second": 4.477, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 360, |
|
"total_flos": 1185435401322496.0, |
|
"train_loss": 2.3563860363430447, |
|
"train_runtime": 347.9343, |
|
"train_samples_per_second": 826.162, |
|
"train_steps_per_second": 6.467 |
|
} |
|
], |
|
"max_steps": 2250, |
|
"num_train_epochs": 50, |
|
"total_flos": 1185435401322496.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|