|
{ |
|
"best_metric": 1.614416241645813, |
|
"best_model_checkpoint": "distilbert_weight_decay_final_metrics/checkpoint-6872", |
|
"epoch": 8.0, |
|
"global_step": 13744, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.970896391152503e-05, |
|
"loss": 0.0266, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.941792782305006e-05, |
|
"loss": 0.0377, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.9126891734575087e-05, |
|
"loss": 0.0319, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7770232031692134, |
|
"eval_f1": 0.781143699897714, |
|
"eval_loss": 1.8788982629776, |
|
"eval_runtime": 8.0848, |
|
"eval_samples_per_second": 437.119, |
|
"eval_steps_per_second": 27.335, |
|
"step": 1718 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.8835855646100117e-05, |
|
"loss": 0.0387, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.8544819557625148e-05, |
|
"loss": 0.0449, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.825378346915018e-05, |
|
"loss": 0.0541, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7744765138653085, |
|
"eval_f1": 0.778056670029498, |
|
"eval_loss": 1.7166885137557983, |
|
"eval_runtime": 8.1001, |
|
"eval_samples_per_second": 436.288, |
|
"eval_steps_per_second": 27.283, |
|
"step": 3436 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7962747380675206e-05, |
|
"loss": 0.0534, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.7671711292200233e-05, |
|
"loss": 0.0394, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.7380675203725264e-05, |
|
"loss": 0.0573, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.708963911525029e-05, |
|
"loss": 0.0438, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7756083757781551, |
|
"eval_f1": 0.7790011542334092, |
|
"eval_loss": 1.7141435146331787, |
|
"eval_runtime": 8.0923, |
|
"eval_samples_per_second": 436.712, |
|
"eval_steps_per_second": 27.31, |
|
"step": 5154 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.679860302677532e-05, |
|
"loss": 0.0331, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.650756693830035e-05, |
|
"loss": 0.0402, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.621653084982538e-05, |
|
"loss": 0.0388, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7761743067345784, |
|
"eval_f1": 0.7801920701860957, |
|
"eval_loss": 1.614416241645813, |
|
"eval_runtime": 8.1981, |
|
"eval_samples_per_second": 431.077, |
|
"eval_steps_per_second": 26.958, |
|
"step": 6872 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.592549476135041e-05, |
|
"loss": 0.0458, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.5634458672875437e-05, |
|
"loss": 0.0282, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1.5343422584400468e-05, |
|
"loss": 0.0348, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 1.5052386495925495e-05, |
|
"loss": 0.0319, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7849462365591398, |
|
"eval_f1": 0.7885570926799784, |
|
"eval_loss": 1.7275830507278442, |
|
"eval_runtime": 8.0966, |
|
"eval_samples_per_second": 436.48, |
|
"eval_steps_per_second": 27.295, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 1.4761350407450526e-05, |
|
"loss": 0.0338, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 1.4470314318975553e-05, |
|
"loss": 0.0272, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 1.4179278230500584e-05, |
|
"loss": 0.0269, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7795698924731183, |
|
"eval_f1": 0.7833736396261405, |
|
"eval_loss": 1.8119807243347168, |
|
"eval_runtime": 8.1074, |
|
"eval_samples_per_second": 435.896, |
|
"eval_steps_per_second": 27.259, |
|
"step": 10308 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 1.3888242142025613e-05, |
|
"loss": 0.0267, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.3597206053550642e-05, |
|
"loss": 0.0191, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 1.330616996507567e-05, |
|
"loss": 0.023, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 1.3015133876600698e-05, |
|
"loss": 0.0251, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7826825127334465, |
|
"eval_f1": 0.7861199960648936, |
|
"eval_loss": 1.7902283668518066, |
|
"eval_runtime": 8.1049, |
|
"eval_samples_per_second": 436.031, |
|
"eval_steps_per_second": 27.267, |
|
"step": 12026 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.2724097788125728e-05, |
|
"loss": 0.0239, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 1.2433061699650757e-05, |
|
"loss": 0.0261, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 1.2142025611175788e-05, |
|
"loss": 0.0194, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7869269949066214, |
|
"eval_f1": 0.7901736955407076, |
|
"eval_loss": 1.923709750175476, |
|
"eval_runtime": 9.3451, |
|
"eval_samples_per_second": 378.167, |
|
"eval_steps_per_second": 23.649, |
|
"step": 13744 |
|
} |
|
], |
|
"max_steps": 34360, |
|
"num_train_epochs": 20, |
|
"total_flos": 6256940082766560.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|