|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.668501853942871, |
|
"learning_rate": 4.75e-05, |
|
"loss": 1.2459, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8352737181075056, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.7375997304916382, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 0.9353, |
|
"eval_samples_per_second": 199.938, |
|
"eval_steps_per_second": 3.208, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.2960220575332642, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.7125, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8362645528858063, |
|
"eval_f1": 0.005681818181818181, |
|
"eval_loss": 0.6395161151885986, |
|
"eval_precision": 0.16666666666666666, |
|
"eval_recall": 0.002890173410404624, |
|
"eval_runtime": 0.9886, |
|
"eval_samples_per_second": 189.166, |
|
"eval_steps_per_second": 3.035, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.4184564352035522, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.6362, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.839980183304434, |
|
"eval_f1": 0.02168021680216802, |
|
"eval_loss": 0.5518138408660889, |
|
"eval_precision": 0.17391304347826086, |
|
"eval_recall": 0.011560693641618497, |
|
"eval_runtime": 1.0186, |
|
"eval_samples_per_second": 183.577, |
|
"eval_steps_per_second": 2.945, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.0016016960144043, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5564, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8578152093138469, |
|
"eval_f1": 0.11389521640091117, |
|
"eval_loss": 0.46721315383911133, |
|
"eval_precision": 0.26881720430107525, |
|
"eval_recall": 0.07225433526011561, |
|
"eval_runtime": 0.9821, |
|
"eval_samples_per_second": 190.403, |
|
"eval_steps_per_second": 3.055, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.4191057682037354, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4714, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8880356700520188, |
|
"eval_f1": 0.3236363636363636, |
|
"eval_loss": 0.3911801874637604, |
|
"eval_precision": 0.4362745098039216, |
|
"eval_recall": 0.25722543352601157, |
|
"eval_runtime": 0.9404, |
|
"eval_samples_per_second": 198.861, |
|
"eval_steps_per_second": 3.19, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.1648097038269043, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3978, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9135496655932623, |
|
"eval_f1": 0.5105740181268881, |
|
"eval_loss": 0.324018269777298, |
|
"eval_precision": 0.5348101265822784, |
|
"eval_recall": 0.4884393063583815, |
|
"eval_runtime": 0.9314, |
|
"eval_samples_per_second": 200.778, |
|
"eval_steps_per_second": 3.221, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.7792711853981018, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3365, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.924201139459995, |
|
"eval_f1": 0.6122448979591837, |
|
"eval_loss": 0.28386977314949036, |
|
"eval_precision": 0.5784061696658098, |
|
"eval_recall": 0.6502890173410405, |
|
"eval_runtime": 0.9336, |
|
"eval_samples_per_second": 200.302, |
|
"eval_steps_per_second": 3.213, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.4722111225128174, |
|
"learning_rate": 3e-05, |
|
"loss": 0.294, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9318801089918256, |
|
"eval_f1": 0.6657789613848202, |
|
"eval_loss": 0.2507442831993103, |
|
"eval_precision": 0.6172839506172839, |
|
"eval_recall": 0.7225433526011561, |
|
"eval_runtime": 0.9589, |
|
"eval_samples_per_second": 195.017, |
|
"eval_steps_per_second": 3.129, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.440127968788147, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2677, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9355957394104533, |
|
"eval_f1": 0.6973684210526315, |
|
"eval_loss": 0.23195113241672516, |
|
"eval_precision": 0.6400966183574879, |
|
"eval_recall": 0.7658959537572254, |
|
"eval_runtime": 0.9704, |
|
"eval_samples_per_second": 192.695, |
|
"eval_steps_per_second": 3.091, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.053550124168396, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2457, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.939311369829081, |
|
"eval_f1": 0.716180371352785, |
|
"eval_loss": 0.2109125703573227, |
|
"eval_precision": 0.6617647058823529, |
|
"eval_recall": 0.7803468208092486, |
|
"eval_runtime": 0.9655, |
|
"eval_samples_per_second": 193.689, |
|
"eval_steps_per_second": 3.107, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.8145582675933838, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2339, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9405499133019569, |
|
"eval_f1": 0.7239101717305151, |
|
"eval_loss": 0.20224529504776, |
|
"eval_precision": 0.6666666666666666, |
|
"eval_recall": 0.791907514450867, |
|
"eval_runtime": 0.9648, |
|
"eval_samples_per_second": 193.823, |
|
"eval_steps_per_second": 3.109, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.759846806526184, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2215, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9425315828585583, |
|
"eval_f1": 0.7450980392156863, |
|
"eval_loss": 0.1986953467130661, |
|
"eval_precision": 0.6801909307875895, |
|
"eval_recall": 0.8236994219653179, |
|
"eval_runtime": 1.023, |
|
"eval_samples_per_second": 182.798, |
|
"eval_steps_per_second": 2.933, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.7489528656005859, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2125, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9432747089422838, |
|
"eval_f1": 0.7408376963350786, |
|
"eval_loss": 0.18985998630523682, |
|
"eval_precision": 0.6770334928229665, |
|
"eval_recall": 0.8179190751445087, |
|
"eval_runtime": 0.9458, |
|
"eval_samples_per_second": 197.715, |
|
"eval_steps_per_second": 3.172, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 1.942519187927246, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2085, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9437701263314342, |
|
"eval_f1": 0.7463863337713535, |
|
"eval_loss": 0.1853644996881485, |
|
"eval_precision": 0.6843373493975904, |
|
"eval_recall": 0.8208092485549133, |
|
"eval_runtime": 0.9518, |
|
"eval_samples_per_second": 196.479, |
|
"eval_steps_per_second": 3.152, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.6467780470848083, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2002, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9459995045826108, |
|
"eval_f1": 0.7519788918205805, |
|
"eval_loss": 0.17970043420791626, |
|
"eval_precision": 0.691747572815534, |
|
"eval_recall": 0.8236994219653179, |
|
"eval_runtime": 0.9531, |
|
"eval_samples_per_second": 196.206, |
|
"eval_steps_per_second": 3.148, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.3061821460723877, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9452563784988853, |
|
"eval_f1": 0.7490144546649146, |
|
"eval_loss": 0.1779414862394333, |
|
"eval_precision": 0.6867469879518072, |
|
"eval_recall": 0.8236994219653179, |
|
"eval_runtime": 0.9553, |
|
"eval_samples_per_second": 195.76, |
|
"eval_steps_per_second": 3.141, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.871351420879364, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1929, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9450086698043101, |
|
"eval_f1": 0.7486910994764396, |
|
"eval_loss": 0.177422896027565, |
|
"eval_precision": 0.6842105263157895, |
|
"eval_recall": 0.8265895953757225, |
|
"eval_runtime": 1.0338, |
|
"eval_samples_per_second": 180.886, |
|
"eval_steps_per_second": 2.902, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.4120484590530396, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1932, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9457517958880357, |
|
"eval_f1": 0.7506561679790026, |
|
"eval_loss": 0.1760500967502594, |
|
"eval_precision": 0.6875, |
|
"eval_recall": 0.8265895953757225, |
|
"eval_runtime": 0.9548, |
|
"eval_samples_per_second": 195.862, |
|
"eval_steps_per_second": 3.142, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.9899640679359436, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1916, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9464949219717612, |
|
"eval_f1": 0.7516425755584757, |
|
"eval_loss": 0.17468006908893585, |
|
"eval_precision": 0.689156626506024, |
|
"eval_recall": 0.8265895953757225, |
|
"eval_runtime": 0.9571, |
|
"eval_samples_per_second": 195.387, |
|
"eval_steps_per_second": 3.135, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.6105211973190308, |
|
"learning_rate": 0.0, |
|
"loss": 0.1887, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9464949219717612, |
|
"eval_f1": 0.7516425755584757, |
|
"eval_loss": 0.17423608899116516, |
|
"eval_precision": 0.689156626506024, |
|
"eval_recall": 0.8265895953757225, |
|
"eval_runtime": 0.9545, |
|
"eval_samples_per_second": 195.923, |
|
"eval_steps_per_second": 3.143, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2120, |
|
"total_flos": 910226579721144.0, |
|
"train_loss": 0.36036560220538444, |
|
"train_runtime": 247.3916, |
|
"train_samples_per_second": 136.383, |
|
"train_steps_per_second": 8.569 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 910226579721144.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|