{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.4620718955993652, "learning_rate": 4.75e-05, "loss": 1.2537, "step": 106 }, { "epoch": 1.0, "eval_accuracy": 0.8365739601279842, "eval_f1": 0.0, "eval_loss": 0.7370312809944153, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 1.0987, "eval_samples_per_second": 170.195, "eval_steps_per_second": 2.73, "step": 106 }, { "epoch": 2.0, "grad_norm": 3.4634525775909424, "learning_rate": 4.5e-05, "loss": 0.7093, "step": 212 }, { "epoch": 2.0, "eval_accuracy": 0.8373123307900566, "eval_f1": 0.0056022408963585435, "eval_loss": 0.6298087239265442, "eval_precision": 0.16666666666666666, "eval_recall": 0.002849002849002849, "eval_runtime": 0.9814, "eval_samples_per_second": 190.541, "eval_steps_per_second": 3.057, "step": 212 }, { "epoch": 3.0, "grad_norm": 1.2972607612609863, "learning_rate": 4.25e-05, "loss": 0.6232, "step": 318 }, { "epoch": 3.0, "eval_accuracy": 0.8417425547624908, "eval_f1": 0.032171581769437, "eval_loss": 0.5443252325057983, "eval_precision": 0.2727272727272727, "eval_recall": 0.017094017094017096, "eval_runtime": 1.0287, "eval_samples_per_second": 181.785, "eval_steps_per_second": 2.916, "step": 318 }, { "epoch": 4.0, "grad_norm": 1.2424334287643433, "learning_rate": 4e-05, "loss": 0.5363, "step": 424 }, { "epoch": 4.0, "eval_accuracy": 0.8624169333005168, "eval_f1": 0.14977973568281938, "eval_loss": 0.45594143867492676, "eval_precision": 0.3300970873786408, "eval_recall": 0.09686609686609686, "eval_runtime": 0.9669, "eval_samples_per_second": 193.397, "eval_steps_per_second": 3.103, "step": 424 }, { "epoch": 5.0, "grad_norm": 1.3577224016189575, "learning_rate": 3.7500000000000003e-05, "loss": 0.4591, "step": 530 }, { "epoch": 5.0, "eval_accuracy": 0.8929362539995077, "eval_f1": 0.3604240282685513, "eval_loss": 0.3863191306591034, "eval_precision": 0.4744186046511628, "eval_recall": 0.2905982905982906, "eval_runtime": 0.9477, "eval_samples_per_second": 197.315, "eval_steps_per_second": 3.165, "step": 530 }, { "epoch": 6.0, "grad_norm": 1.2191588878631592, "learning_rate": 3.5e-05, "loss": 0.387, "step": 636 }, { "epoch": 6.0, "eval_accuracy": 0.9190253507260645, "eval_f1": 0.5714285714285715, "eval_loss": 0.32724133133888245, "eval_precision": 0.5789473684210527, "eval_recall": 0.5641025641025641, "eval_runtime": 0.9315, "eval_samples_per_second": 200.744, "eval_steps_per_second": 3.22, "step": 636 }, { "epoch": 7.0, "grad_norm": 1.1846706867218018, "learning_rate": 3.2500000000000004e-05, "loss": 0.3252, "step": 742 }, { "epoch": 7.0, "eval_accuracy": 0.9291164164410534, "eval_f1": 0.6436170212765958, "eval_loss": 0.2810536026954651, "eval_precision": 0.6034912718204489, "eval_recall": 0.6894586894586895, "eval_runtime": 0.9504, "eval_samples_per_second": 196.764, "eval_steps_per_second": 3.157, "step": 742 }, { "epoch": 8.0, "grad_norm": 1.0833159685134888, "learning_rate": 3e-05, "loss": 0.2874, "step": 848 }, { "epoch": 8.0, "eval_accuracy": 0.9313315284272705, "eval_f1": 0.655217965653897, "eval_loss": 0.24546079337596893, "eval_precision": 0.6108374384236454, "eval_recall": 0.7065527065527065, "eval_runtime": 0.9612, "eval_samples_per_second": 194.541, "eval_steps_per_second": 3.121, "step": 848 }, { "epoch": 9.0, "grad_norm": 1.9407267570495605, "learning_rate": 2.7500000000000004e-05, "loss": 0.2588, "step": 954 }, { "epoch": 9.0, "eval_accuracy": 0.9333005168594635, "eval_f1": 0.6761290322580644, "eval_loss": 0.22847984731197357, "eval_precision": 0.6179245283018868, "eval_recall": 0.7464387464387464, "eval_runtime": 0.9716, "eval_samples_per_second": 192.466, "eval_steps_per_second": 3.088, "step": 954 }, { "epoch": 10.0, "grad_norm": 0.7366420030593872, "learning_rate": 2.5e-05, "loss": 0.2393, "step": 1060 }, { "epoch": 10.0, "eval_accuracy": 0.9362539995077529, "eval_f1": 0.6975546975546976, "eval_loss": 0.21532489359378815, "eval_precision": 0.636150234741784, "eval_recall": 0.7720797720797721, "eval_runtime": 0.9655, "eval_samples_per_second": 193.678, "eval_steps_per_second": 3.107, "step": 1060 }, { "epoch": 11.0, "grad_norm": 1.1416951417922974, "learning_rate": 2.25e-05, "loss": 0.224, "step": 1166 }, { "epoch": 11.0, "eval_accuracy": 0.9387152350479941, "eval_f1": 0.7030456852791879, "eval_loss": 0.2062499076128006, "eval_precision": 0.6338672768878718, "eval_recall": 0.7891737891737892, "eval_runtime": 0.9619, "eval_samples_per_second": 194.401, "eval_steps_per_second": 3.119, "step": 1166 }, { "epoch": 12.0, "grad_norm": 1.7414947748184204, "learning_rate": 2e-05, "loss": 0.2137, "step": 1272 }, { "epoch": 12.0, "eval_accuracy": 0.9387152350479941, "eval_f1": 0.7135549872122762, "eval_loss": 0.20024912059307098, "eval_precision": 0.6473317865429234, "eval_recall": 0.7948717948717948, "eval_runtime": 0.9544, "eval_samples_per_second": 195.928, "eval_steps_per_second": 3.143, "step": 1272 }, { "epoch": 13.0, "grad_norm": 1.186489224433899, "learning_rate": 1.75e-05, "loss": 0.2052, "step": 1378 }, { "epoch": 13.0, "eval_accuracy": 0.9424070883583558, "eval_f1": 0.7218628719275549, "eval_loss": 0.18892288208007812, "eval_precision": 0.6611374407582938, "eval_recall": 0.7948717948717948, "eval_runtime": 0.9502, "eval_samples_per_second": 196.793, "eval_steps_per_second": 3.157, "step": 1378 }, { "epoch": 14.0, "grad_norm": 1.4163442850112915, "learning_rate": 1.5e-05, "loss": 0.2039, "step": 1484 }, { "epoch": 14.0, "eval_accuracy": 0.9431454590204282, "eval_f1": 0.7312661498708009, "eval_loss": 0.18623687326908112, "eval_precision": 0.6690307328605201, "eval_recall": 0.8062678062678063, "eval_runtime": 0.9553, "eval_samples_per_second": 195.753, "eval_steps_per_second": 3.14, "step": 1484 }, { "epoch": 15.0, "grad_norm": 1.5289697647094727, "learning_rate": 1.25e-05, "loss": 0.1975, "step": 1590 }, { "epoch": 15.0, "eval_accuracy": 0.9431454590204282, "eval_f1": 0.7319587628865979, "eval_loss": 0.1867983341217041, "eval_precision": 0.668235294117647, "eval_recall": 0.8091168091168092, "eval_runtime": 0.9597, "eval_samples_per_second": 194.853, "eval_steps_per_second": 3.126, "step": 1590 }, { "epoch": 16.0, "grad_norm": 2.371168375015259, "learning_rate": 1e-05, "loss": 0.1936, "step": 1696 }, { "epoch": 16.0, "eval_accuracy": 0.94265321191238, "eval_f1": 0.7321428571428572, "eval_loss": 0.18374690413475037, "eval_precision": 0.6628175519630485, "eval_recall": 0.8176638176638177, "eval_runtime": 0.9485, "eval_samples_per_second": 197.152, "eval_steps_per_second": 3.163, "step": 1696 }, { "epoch": 17.0, "grad_norm": 0.7330523133277893, "learning_rate": 7.5e-06, "loss": 0.1908, "step": 1802 }, { "epoch": 17.0, "eval_accuracy": 0.94265321191238, "eval_f1": 0.7305236270753512, "eval_loss": 0.18250302970409393, "eval_precision": 0.6620370370370371, "eval_recall": 0.8148148148148148, "eval_runtime": 0.9449, "eval_samples_per_second": 197.907, "eval_steps_per_second": 3.175, "step": 1802 }, { "epoch": 18.0, "grad_norm": 1.4619590044021606, "learning_rate": 5e-06, "loss": 0.1885, "step": 1908 }, { "epoch": 18.0, "eval_accuracy": 0.9431454590204282, "eval_f1": 0.7270408163265305, "eval_loss": 0.1805543154478073, "eval_precision": 0.6581986143187067, "eval_recall": 0.811965811965812, "eval_runtime": 0.95, "eval_samples_per_second": 196.84, "eval_steps_per_second": 3.158, "step": 1908 }, { "epoch": 19.0, "grad_norm": 0.7237643003463745, "learning_rate": 2.5e-06, "loss": 0.1877, "step": 2014 }, { "epoch": 19.0, "eval_accuracy": 0.9431454590204282, "eval_f1": 0.7247119078104994, "eval_loss": 0.17826727032661438, "eval_precision": 0.6581395348837209, "eval_recall": 0.8062678062678063, "eval_runtime": 0.9528, "eval_samples_per_second": 196.268, "eval_steps_per_second": 3.149, "step": 2014 }, { "epoch": 20.0, "grad_norm": 2.3640408515930176, "learning_rate": 0.0, "loss": 0.1858, "step": 2120 }, { "epoch": 20.0, "eval_accuracy": 0.9433915825744523, "eval_f1": 0.7279693486590039, "eval_loss": 0.1783868670463562, "eval_precision": 0.6597222222222222, "eval_recall": 0.811965811965812, "eval_runtime": 0.9505, "eval_samples_per_second": 196.741, "eval_steps_per_second": 3.156, "step": 2120 }, { "epoch": 20.0, "step": 2120, "total_flos": 907028676246000.0, "train_loss": 0.35348991987840184, "train_runtime": 247.3863, "train_samples_per_second": 136.467, "train_steps_per_second": 8.57 } ], "logging_steps": 500, "max_steps": 2120, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 907028676246000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }