|
{ |
|
"best_metric": 1.614416241645813, |
|
"best_model_checkpoint": "distilbert_weight_decay_final_metrics/checkpoint-6872", |
|
"epoch": 5.0, |
|
"global_step": 8590, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.970896391152503e-05, |
|
"loss": 0.0266, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.941792782305006e-05, |
|
"loss": 0.0377, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.9126891734575087e-05, |
|
"loss": 0.0319, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7770232031692134, |
|
"eval_f1": 0.781143699897714, |
|
"eval_loss": 1.8788982629776, |
|
"eval_runtime": 8.0848, |
|
"eval_samples_per_second": 437.119, |
|
"eval_steps_per_second": 27.335, |
|
"step": 1718 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.8835855646100117e-05, |
|
"loss": 0.0387, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.8544819557625148e-05, |
|
"loss": 0.0449, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.825378346915018e-05, |
|
"loss": 0.0541, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7744765138653085, |
|
"eval_f1": 0.778056670029498, |
|
"eval_loss": 1.7166885137557983, |
|
"eval_runtime": 8.1001, |
|
"eval_samples_per_second": 436.288, |
|
"eval_steps_per_second": 27.283, |
|
"step": 3436 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7962747380675206e-05, |
|
"loss": 0.0534, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.7671711292200233e-05, |
|
"loss": 0.0394, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.7380675203725264e-05, |
|
"loss": 0.0573, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.708963911525029e-05, |
|
"loss": 0.0438, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7756083757781551, |
|
"eval_f1": 0.7790011542334092, |
|
"eval_loss": 1.7141435146331787, |
|
"eval_runtime": 8.0923, |
|
"eval_samples_per_second": 436.712, |
|
"eval_steps_per_second": 27.31, |
|
"step": 5154 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.679860302677532e-05, |
|
"loss": 0.0331, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.650756693830035e-05, |
|
"loss": 0.0402, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.621653084982538e-05, |
|
"loss": 0.0388, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7761743067345784, |
|
"eval_f1": 0.7801920701860957, |
|
"eval_loss": 1.614416241645813, |
|
"eval_runtime": 8.1981, |
|
"eval_samples_per_second": 431.077, |
|
"eval_steps_per_second": 26.958, |
|
"step": 6872 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.592549476135041e-05, |
|
"loss": 0.0458, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.5634458672875437e-05, |
|
"loss": 0.0282, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1.5343422584400468e-05, |
|
"loss": 0.0348, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 1.5052386495925495e-05, |
|
"loss": 0.0319, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7849462365591398, |
|
"eval_f1": 0.7885570926799784, |
|
"eval_loss": 1.7275830507278442, |
|
"eval_runtime": 8.0966, |
|
"eval_samples_per_second": 436.48, |
|
"eval_steps_per_second": 27.295, |
|
"step": 8590 |
|
} |
|
], |
|
"max_steps": 34360, |
|
"num_train_epochs": 20, |
|
"total_flos": 3910587551729100.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|