{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.668501853942871, "learning_rate": 4.75e-05, "loss": 1.2459, "step": 106 }, { "epoch": 1.0, "eval_accuracy": 0.8352737181075056, "eval_f1": 0.0, "eval_loss": 0.7375997304916382, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 0.9353, "eval_samples_per_second": 199.938, "eval_steps_per_second": 3.208, "step": 106 }, { "epoch": 2.0, "grad_norm": 1.2960220575332642, "learning_rate": 4.5e-05, "loss": 0.7125, "step": 212 }, { "epoch": 2.0, "eval_accuracy": 0.8362645528858063, "eval_f1": 0.005681818181818181, "eval_loss": 0.6395161151885986, "eval_precision": 0.16666666666666666, "eval_recall": 0.002890173410404624, "eval_runtime": 0.9886, "eval_samples_per_second": 189.166, "eval_steps_per_second": 3.035, "step": 212 }, { "epoch": 3.0, "grad_norm": 1.4184564352035522, "learning_rate": 4.25e-05, "loss": 0.6362, "step": 318 }, { "epoch": 3.0, "eval_accuracy": 0.839980183304434, "eval_f1": 0.02168021680216802, "eval_loss": 0.5518138408660889, "eval_precision": 0.17391304347826086, "eval_recall": 0.011560693641618497, "eval_runtime": 1.0186, "eval_samples_per_second": 183.577, "eval_steps_per_second": 2.945, "step": 318 }, { "epoch": 4.0, "grad_norm": 2.0016016960144043, "learning_rate": 4e-05, "loss": 0.5564, "step": 424 }, { "epoch": 4.0, "eval_accuracy": 0.8578152093138469, "eval_f1": 0.11389521640091117, "eval_loss": 0.46721315383911133, "eval_precision": 0.26881720430107525, "eval_recall": 0.07225433526011561, "eval_runtime": 0.9821, "eval_samples_per_second": 190.403, "eval_steps_per_second": 3.055, "step": 424 }, { "epoch": 5.0, "grad_norm": 1.4191057682037354, "learning_rate": 3.7500000000000003e-05, "loss": 0.4714, "step": 530 }, { "epoch": 5.0, "eval_accuracy": 0.8880356700520188, "eval_f1": 0.3236363636363636, "eval_loss": 0.3911801874637604, "eval_precision": 0.4362745098039216, "eval_recall": 0.25722543352601157, "eval_runtime": 0.9404, "eval_samples_per_second": 198.861, "eval_steps_per_second": 3.19, "step": 530 }, { "epoch": 6.0, "grad_norm": 1.1648097038269043, "learning_rate": 3.5e-05, "loss": 0.3978, "step": 636 }, { "epoch": 6.0, "eval_accuracy": 0.9135496655932623, "eval_f1": 0.5105740181268881, "eval_loss": 0.324018269777298, "eval_precision": 0.5348101265822784, "eval_recall": 0.4884393063583815, "eval_runtime": 0.9314, "eval_samples_per_second": 200.778, "eval_steps_per_second": 3.221, "step": 636 }, { "epoch": 7.0, "grad_norm": 0.7792711853981018, "learning_rate": 3.2500000000000004e-05, "loss": 0.3365, "step": 742 }, { "epoch": 7.0, "eval_accuracy": 0.924201139459995, "eval_f1": 0.6122448979591837, "eval_loss": 0.28386977314949036, "eval_precision": 0.5784061696658098, "eval_recall": 0.6502890173410405, "eval_runtime": 0.9336, "eval_samples_per_second": 200.302, "eval_steps_per_second": 3.213, "step": 742 }, { "epoch": 8.0, "grad_norm": 1.4722111225128174, "learning_rate": 3e-05, "loss": 0.294, "step": 848 }, { "epoch": 8.0, "eval_accuracy": 0.9318801089918256, "eval_f1": 0.6657789613848202, "eval_loss": 0.2507442831993103, "eval_precision": 0.6172839506172839, "eval_recall": 0.7225433526011561, "eval_runtime": 0.9589, "eval_samples_per_second": 195.017, "eval_steps_per_second": 3.129, "step": 848 }, { "epoch": 9.0, "grad_norm": 1.440127968788147, "learning_rate": 2.7500000000000004e-05, "loss": 0.2677, "step": 954 }, { "epoch": 9.0, "eval_accuracy": 0.9355957394104533, "eval_f1": 0.6973684210526315, "eval_loss": 0.23195113241672516, "eval_precision": 0.6400966183574879, "eval_recall": 0.7658959537572254, "eval_runtime": 0.9704, "eval_samples_per_second": 192.695, "eval_steps_per_second": 3.091, "step": 954 }, { "epoch": 10.0, "grad_norm": 1.053550124168396, "learning_rate": 2.5e-05, "loss": 0.2457, "step": 1060 }, { "epoch": 10.0, "eval_accuracy": 0.939311369829081, "eval_f1": 0.716180371352785, "eval_loss": 0.2109125703573227, "eval_precision": 0.6617647058823529, "eval_recall": 0.7803468208092486, "eval_runtime": 0.9655, "eval_samples_per_second": 193.689, "eval_steps_per_second": 3.107, "step": 1060 }, { "epoch": 11.0, "grad_norm": 1.8145582675933838, "learning_rate": 2.25e-05, "loss": 0.2339, "step": 1166 }, { "epoch": 11.0, "eval_accuracy": 0.9405499133019569, "eval_f1": 0.7239101717305151, "eval_loss": 0.20224529504776, "eval_precision": 0.6666666666666666, "eval_recall": 0.791907514450867, "eval_runtime": 0.9648, "eval_samples_per_second": 193.823, "eval_steps_per_second": 3.109, "step": 1166 }, { "epoch": 12.0, "grad_norm": 1.759846806526184, "learning_rate": 2e-05, "loss": 0.2215, "step": 1272 }, { "epoch": 12.0, "eval_accuracy": 0.9425315828585583, "eval_f1": 0.7450980392156863, "eval_loss": 0.1986953467130661, "eval_precision": 0.6801909307875895, "eval_recall": 0.8236994219653179, "eval_runtime": 1.023, "eval_samples_per_second": 182.798, "eval_steps_per_second": 2.933, "step": 1272 }, { "epoch": 13.0, "grad_norm": 0.7489528656005859, "learning_rate": 1.75e-05, "loss": 0.2125, "step": 1378 }, { "epoch": 13.0, "eval_accuracy": 0.9432747089422838, "eval_f1": 0.7408376963350786, "eval_loss": 0.18985998630523682, "eval_precision": 0.6770334928229665, "eval_recall": 0.8179190751445087, "eval_runtime": 0.9458, "eval_samples_per_second": 197.715, "eval_steps_per_second": 3.172, "step": 1378 }, { "epoch": 14.0, "grad_norm": 1.942519187927246, "learning_rate": 1.5e-05, "loss": 0.2085, "step": 1484 }, { "epoch": 14.0, "eval_accuracy": 0.9437701263314342, "eval_f1": 0.7463863337713535, "eval_loss": 0.1853644996881485, "eval_precision": 0.6843373493975904, "eval_recall": 0.8208092485549133, "eval_runtime": 0.9518, "eval_samples_per_second": 196.479, "eval_steps_per_second": 3.152, "step": 1484 }, { "epoch": 15.0, "grad_norm": 0.6467780470848083, "learning_rate": 1.25e-05, "loss": 0.2002, "step": 1590 }, { "epoch": 15.0, "eval_accuracy": 0.9459995045826108, "eval_f1": 0.7519788918205805, "eval_loss": 0.17970043420791626, "eval_precision": 0.691747572815534, "eval_recall": 0.8236994219653179, "eval_runtime": 0.9531, "eval_samples_per_second": 196.206, "eval_steps_per_second": 3.148, "step": 1590 }, { "epoch": 16.0, "grad_norm": 1.3061821460723877, "learning_rate": 1e-05, "loss": 0.2, "step": 1696 }, { "epoch": 16.0, "eval_accuracy": 0.9452563784988853, "eval_f1": 0.7490144546649146, "eval_loss": 0.1779414862394333, "eval_precision": 0.6867469879518072, "eval_recall": 0.8236994219653179, "eval_runtime": 0.9553, "eval_samples_per_second": 195.76, "eval_steps_per_second": 3.141, "step": 1696 }, { "epoch": 17.0, "grad_norm": 0.871351420879364, "learning_rate": 7.5e-06, "loss": 0.1929, "step": 1802 }, { "epoch": 17.0, "eval_accuracy": 0.9450086698043101, "eval_f1": 0.7486910994764396, "eval_loss": 0.177422896027565, "eval_precision": 0.6842105263157895, "eval_recall": 0.8265895953757225, "eval_runtime": 1.0338, "eval_samples_per_second": 180.886, "eval_steps_per_second": 2.902, "step": 1802 }, { "epoch": 18.0, "grad_norm": 1.4120484590530396, "learning_rate": 5e-06, "loss": 0.1932, "step": 1908 }, { "epoch": 18.0, "eval_accuracy": 0.9457517958880357, "eval_f1": 0.7506561679790026, "eval_loss": 0.1760500967502594, "eval_precision": 0.6875, "eval_recall": 0.8265895953757225, "eval_runtime": 0.9548, "eval_samples_per_second": 195.862, "eval_steps_per_second": 3.142, "step": 1908 }, { "epoch": 19.0, "grad_norm": 0.9899640679359436, "learning_rate": 2.5e-06, "loss": 0.1916, "step": 2014 }, { "epoch": 19.0, "eval_accuracy": 0.9464949219717612, "eval_f1": 0.7516425755584757, "eval_loss": 0.17468006908893585, "eval_precision": 0.689156626506024, "eval_recall": 0.8265895953757225, "eval_runtime": 0.9571, "eval_samples_per_second": 195.387, "eval_steps_per_second": 3.135, "step": 2014 }, { "epoch": 20.0, "grad_norm": 1.6105211973190308, "learning_rate": 0.0, "loss": 0.1887, "step": 2120 }, { "epoch": 20.0, "eval_accuracy": 0.9464949219717612, "eval_f1": 0.7516425755584757, "eval_loss": 0.17423608899116516, "eval_precision": 0.689156626506024, "eval_recall": 0.8265895953757225, "eval_runtime": 0.9545, "eval_samples_per_second": 195.923, "eval_steps_per_second": 3.143, "step": 2120 }, { "epoch": 20.0, "step": 2120, "total_flos": 910226579721144.0, "train_loss": 0.36036560220538444, "train_runtime": 247.3916, "train_samples_per_second": 136.383, "train_steps_per_second": 8.569 } ], "logging_steps": 500, "max_steps": 2120, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 910226579721144.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }