|
{ |
|
"best_metric": 0.27871406078338623, |
|
"best_model_checkpoint": "/content/deberta-v3-small/checkpoint-230", |
|
"epoch": 10.0, |
|
"global_step": 2300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8921568627450981, |
|
"eval_combined_score": 0.9077509052401449, |
|
"eval_f1": 0.9233449477351917, |
|
"eval_loss": 0.27871406078338623, |
|
"eval_runtime": 0.9934, |
|
"eval_samples_per_second": 410.721, |
|
"eval_steps_per_second": 51.34, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.875, |
|
"eval_combined_score": 0.8943527918781726, |
|
"eval_f1": 0.9137055837563453, |
|
"eval_loss": 0.3650500774383545, |
|
"eval_runtime": 1.104, |
|
"eval_samples_per_second": 369.555, |
|
"eval_steps_per_second": 46.194, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8799019607843137, |
|
"eval_combined_score": 0.8989124544290077, |
|
"eval_f1": 0.9179229480737018, |
|
"eval_loss": 0.5237621068954468, |
|
"eval_runtime": 1.014, |
|
"eval_samples_per_second": 402.381, |
|
"eval_steps_per_second": 50.298, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8946078431372549, |
|
"eval_combined_score": 0.9084250788923165, |
|
"eval_f1": 0.9222423146473779, |
|
"eval_loss": 0.471233606338501, |
|
"eval_runtime": 1.021, |
|
"eval_samples_per_second": 399.621, |
|
"eval_steps_per_second": 49.953, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1.6956521739130433e-05, |
|
"loss": 0.2147, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8946078431372549, |
|
"eval_combined_score": 0.9104257054451284, |
|
"eval_f1": 0.9262435677530019, |
|
"eval_loss": 0.570386528968811, |
|
"eval_runtime": 0.9794, |
|
"eval_samples_per_second": 416.598, |
|
"eval_steps_per_second": 52.075, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8995098039215687, |
|
"eval_combined_score": 0.9139782876501386, |
|
"eval_f1": 0.9284467713787086, |
|
"eval_loss": 0.5697466135025024, |
|
"eval_runtime": 0.9891, |
|
"eval_samples_per_second": 412.495, |
|
"eval_steps_per_second": 51.562, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8921568627450981, |
|
"eval_combined_score": 0.9067927170868348, |
|
"eval_f1": 0.9214285714285715, |
|
"eval_loss": 0.6650992631912231, |
|
"eval_runtime": 1.008, |
|
"eval_samples_per_second": 404.777, |
|
"eval_steps_per_second": 50.597, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8946078431372549, |
|
"eval_combined_score": 0.9092508242234947, |
|
"eval_f1": 0.9238938053097344, |
|
"eval_loss": 0.672588586807251, |
|
"eval_runtime": 1.0378, |
|
"eval_samples_per_second": 393.14, |
|
"eval_steps_per_second": 49.143, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 3.913043478260869e-06, |
|
"loss": 0.0183, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8848039215686274, |
|
"eval_combined_score": 0.9012460938841386, |
|
"eval_f1": 0.9176882661996497, |
|
"eval_loss": 0.7250252366065979, |
|
"eval_runtime": 1.0544, |
|
"eval_samples_per_second": 386.943, |
|
"eval_steps_per_second": 48.368, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8921568627450981, |
|
"eval_combined_score": 0.907209173422019, |
|
"eval_f1": 0.9222614840989399, |
|
"eval_loss": 0.7092880010604858, |
|
"eval_runtime": 1.0253, |
|
"eval_samples_per_second": 397.917, |
|
"eval_steps_per_second": 49.74, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2300, |
|
"total_flos": 1214769315164160.0, |
|
"train_loss": 0.1021328606294549, |
|
"train_runtime": 294.6784, |
|
"train_samples_per_second": 124.475, |
|
"train_steps_per_second": 7.805 |
|
} |
|
], |
|
"max_steps": 2300, |
|
"num_train_epochs": 10, |
|
"total_flos": 1214769315164160.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|