|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.1851325035095215, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5657, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.6528788358114521, |
|
"eval_loss": 0.5160595774650574, |
|
"eval_precision": 0.6616220346866901, |
|
"eval_recall": 0.6474358974358975, |
|
"eval_runtime": 5.1403, |
|
"eval_samples_per_second": 77.623, |
|
"eval_steps_per_second": 9.727, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.9789302349090576, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5088, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7393483709273183, |
|
"eval_f1": 0.6971094890510949, |
|
"eval_loss": 0.49126291275024414, |
|
"eval_precision": 0.6917293233082706, |
|
"eval_recall": 0.7055828332424077, |
|
"eval_runtime": 5.0447, |
|
"eval_samples_per_second": 79.093, |
|
"eval_steps_per_second": 9.911, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.5484442710876465, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4682, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7844611528822055, |
|
"eval_f1": 0.7412841546534773, |
|
"eval_loss": 0.44238051772117615, |
|
"eval_precision": 0.7401260504201681, |
|
"eval_recall": 0.7424986361156574, |
|
"eval_runtime": 5.0582, |
|
"eval_samples_per_second": 78.882, |
|
"eval_steps_per_second": 9.885, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.417043685913086, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4114, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8095238095238095, |
|
"eval_f1": 0.7702309510820149, |
|
"eval_loss": 0.39798638224601746, |
|
"eval_precision": 0.7702309510820149, |
|
"eval_recall": 0.7702309510820149, |
|
"eval_runtime": 5.0628, |
|
"eval_samples_per_second": 78.81, |
|
"eval_steps_per_second": 9.876, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.0948269367218018, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3862, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8145363408521303, |
|
"eval_f1": 0.7889190734915642, |
|
"eval_loss": 0.3889566659927368, |
|
"eval_precision": 0.7782998251748252, |
|
"eval_recall": 0.8087834151663938, |
|
"eval_runtime": 5.0983, |
|
"eval_samples_per_second": 78.262, |
|
"eval_steps_per_second": 9.807, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.5050086975097656, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3512, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8127815315315315, |
|
"eval_loss": 0.35834890604019165, |
|
"eval_precision": 0.8244897959183674, |
|
"eval_recall": 0.8036006546644845, |
|
"eval_runtime": 5.0754, |
|
"eval_samples_per_second": 78.614, |
|
"eval_steps_per_second": 9.851, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.839920163154602, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3428, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8229427559286084, |
|
"eval_loss": 0.34960630536079407, |
|
"eval_precision": 0.8206541218637993, |
|
"eval_recall": 0.8253773413347881, |
|
"eval_runtime": 5.0545, |
|
"eval_samples_per_second": 78.939, |
|
"eval_steps_per_second": 9.892, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.5413811206817627, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3254, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8127815315315315, |
|
"eval_loss": 0.3425041735172272, |
|
"eval_precision": 0.8244897959183674, |
|
"eval_recall": 0.8036006546644845, |
|
"eval_runtime": 5.1016, |
|
"eval_samples_per_second": 78.21, |
|
"eval_steps_per_second": 9.801, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.189133644104004, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3226, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8245369048813042, |
|
"eval_loss": 0.338846355676651, |
|
"eval_precision": 0.8310003145643283, |
|
"eval_recall": 0.8189216221131115, |
|
"eval_runtime": 5.0603, |
|
"eval_samples_per_second": 78.849, |
|
"eval_steps_per_second": 9.881, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.058858394622803, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3063, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8315033783783784, |
|
"eval_loss": 0.3375921845436096, |
|
"eval_precision": 0.8438775510204082, |
|
"eval_recall": 0.8217403164211674, |
|
"eval_runtime": 5.0475, |
|
"eval_samples_per_second": 79.049, |
|
"eval_steps_per_second": 9.906, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.5873308181762695, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2939, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8350789627607721, |
|
"eval_loss": 0.33186405897140503, |
|
"eval_precision": 0.8463358876939919, |
|
"eval_recall": 0.8260138206946717, |
|
"eval_runtime": 5.0675, |
|
"eval_samples_per_second": 78.738, |
|
"eval_steps_per_second": 9.867, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 9.944389343261719, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2838, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8228567054500919, |
|
"eval_loss": 0.33234164118766785, |
|
"eval_precision": 0.8263351692555232, |
|
"eval_recall": 0.8196490270958356, |
|
"eval_runtime": 5.0657, |
|
"eval_samples_per_second": 78.765, |
|
"eval_steps_per_second": 9.87, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 3.0918514728546143, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2916, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8295950648528947, |
|
"eval_loss": 0.3283240497112274, |
|
"eval_precision": 0.8472157618446409, |
|
"eval_recall": 0.816739407164939, |
|
"eval_runtime": 5.0481, |
|
"eval_samples_per_second": 79.039, |
|
"eval_steps_per_second": 9.905, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 8.040557861328125, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2826, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8350789627607721, |
|
"eval_loss": 0.32443901896476746, |
|
"eval_precision": 0.8463358876939919, |
|
"eval_recall": 0.8260138206946717, |
|
"eval_runtime": 5.0572, |
|
"eval_samples_per_second": 78.897, |
|
"eval_steps_per_second": 9.887, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.8433086276054382, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2739, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8411818738518064, |
|
"eval_loss": 0.32310473918914795, |
|
"eval_precision": 0.8448542607834644, |
|
"eval_recall": 0.8377886888525186, |
|
"eval_runtime": 5.047, |
|
"eval_samples_per_second": 79.056, |
|
"eval_steps_per_second": 9.907, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 7.48613166809082, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2674, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8411818738518064, |
|
"eval_loss": 0.322089284658432, |
|
"eval_precision": 0.8448542607834644, |
|
"eval_recall": 0.8377886888525186, |
|
"eval_runtime": 5.0922, |
|
"eval_samples_per_second": 78.356, |
|
"eval_steps_per_second": 9.819, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 6.076572895050049, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2648, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8413023981282901, |
|
"eval_loss": 0.3192753493785858, |
|
"eval_precision": 0.8527593534677056, |
|
"eval_recall": 0.8320603746135662, |
|
"eval_runtime": 5.0505, |
|
"eval_samples_per_second": 79.002, |
|
"eval_steps_per_second": 9.9, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.7677204608917236, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2687, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8403508771929824, |
|
"eval_loss": 0.31721195578575134, |
|
"eval_precision": 0.8460491741741742, |
|
"eval_recall": 0.8352882342244045, |
|
"eval_runtime": 5.0513, |
|
"eval_samples_per_second": 78.99, |
|
"eval_steps_per_second": 9.899, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.890503406524658, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.264, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8448388501742161, |
|
"eval_loss": 0.31702518463134766, |
|
"eval_precision": 0.8551721930610677, |
|
"eval_recall": 0.8363338788870704, |
|
"eval_runtime": 5.0604, |
|
"eval_samples_per_second": 78.847, |
|
"eval_steps_per_second": 9.881, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 8.532156944274902, |
|
"learning_rate": 0.0, |
|
"loss": 0.2637, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8395012067578439, |
|
"eval_loss": 0.3147731125354767, |
|
"eval_precision": 0.8473584308763049, |
|
"eval_recall": 0.8327877795962902, |
|
"eval_runtime": 5.0483, |
|
"eval_samples_per_second": 79.036, |
|
"eval_steps_per_second": 9.904, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7609911792720000.0, |
|
"train_loss": 0.33715896919125415, |
|
"train_runtime": 1952.8505, |
|
"train_samples_per_second": 37.258, |
|
"train_steps_per_second": 1.249 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7609911792720000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|