|
{ |
|
"best_metric": 0.3793538836642285, |
|
"best_model_checkpoint": "/xdisk/msurdeanu/enoriega/kw_pubmed/kw_pubmed_1000_0.00006/checkpoint-12", |
|
"epoch": 1.1524390243902438, |
|
"global_step": 52, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.3767850923023337, |
|
"eval_loss": 3.902531385421753, |
|
"eval_runtime": 16.573, |
|
"eval_samples_per_second": 603.393, |
|
"eval_steps_per_second": 18.886, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5.8666666666666665e-05, |
|
"loss": 4.0323, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.37399860675722746, |
|
"eval_loss": 3.8996994495391846, |
|
"eval_runtime": 16.5623, |
|
"eval_samples_per_second": 603.78, |
|
"eval_steps_per_second": 18.898, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5.7333333333333336e-05, |
|
"loss": 3.4655, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.3793538836642285, |
|
"eval_loss": 3.9113454818725586, |
|
"eval_runtime": 16.5561, |
|
"eval_samples_per_second": 604.006, |
|
"eval_steps_per_second": 18.905, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.6e-05, |
|
"loss": 3.1532, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.3754353883664229, |
|
"eval_loss": 3.9654853343963623, |
|
"eval_runtime": 16.5341, |
|
"eval_samples_per_second": 604.812, |
|
"eval_steps_per_second": 18.931, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.466666666666667e-05, |
|
"loss": 2.9457, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.3714733542319749, |
|
"eval_loss": 4.051833629608154, |
|
"eval_runtime": 16.5251, |
|
"eval_samples_per_second": 605.14, |
|
"eval_steps_per_second": 18.941, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.36964472309299895, |
|
"eval_loss": 4.067558765411377, |
|
"eval_runtime": 16.4933, |
|
"eval_samples_per_second": 606.306, |
|
"eval_steps_per_second": 18.977, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.36e-05, |
|
"loss": 2.7639, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.3722570532915361, |
|
"eval_loss": 4.092674255371094, |
|
"eval_runtime": 16.5366, |
|
"eval_samples_per_second": 604.721, |
|
"eval_steps_per_second": 18.928, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.2266666666666665e-05, |
|
"loss": 2.6542, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.37169104841518635, |
|
"eval_loss": 4.114505290985107, |
|
"eval_runtime": 16.541, |
|
"eval_samples_per_second": 604.559, |
|
"eval_steps_per_second": 18.923, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.0933333333333336e-05, |
|
"loss": 2.5371, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.3714733542319749, |
|
"eval_loss": 4.133761882781982, |
|
"eval_runtime": 16.5877, |
|
"eval_samples_per_second": 602.858, |
|
"eval_steps_per_second": 18.869, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.96e-05, |
|
"loss": 2.4884, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.3642894461859979, |
|
"eval_loss": 4.216277599334717, |
|
"eval_runtime": 16.5043, |
|
"eval_samples_per_second": 605.903, |
|
"eval_steps_per_second": 18.965, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.36920933472657613, |
|
"eval_loss": 4.178205490112305, |
|
"eval_runtime": 16.5055, |
|
"eval_samples_per_second": 605.86, |
|
"eval_steps_per_second": 18.963, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.826666666666666e-05, |
|
"loss": 2.3936, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.3670323928944619, |
|
"eval_loss": 4.243963718414307, |
|
"eval_runtime": 16.5457, |
|
"eval_samples_per_second": 604.386, |
|
"eval_steps_per_second": 18.917, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.693333333333334e-05, |
|
"loss": 2.694, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.36733716475095785, |
|
"eval_loss": 4.244164943695068, |
|
"eval_runtime": 16.5637, |
|
"eval_samples_per_second": 603.731, |
|
"eval_steps_per_second": 18.897, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"step": 52, |
|
"total_flos": 3.18987289303776e+16, |
|
"train_loss": 2.8869726932965794, |
|
"train_runtime": 1925.7546, |
|
"train_samples_per_second": 953.779, |
|
"train_steps_per_second": 0.117 |
|
} |
|
], |
|
"max_steps": 225, |
|
"num_train_epochs": 5, |
|
"total_flos": 3.18987289303776e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|