{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.7648299932479858, "learning_rate": 4.75e-05, "loss": 1.1483, "step": 106 }, { "epoch": 1.0, "eval_accuracy": 0.8448836627470603, "eval_f1": 0.0, "eval_loss": 0.6899715065956116, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 0.8894, "eval_samples_per_second": 210.248, "eval_steps_per_second": 3.373, "step": 106 }, { "epoch": 2.0, "grad_norm": 1.372132420539856, "learning_rate": 4.5e-05, "loss": 0.6875, "step": 212 }, { "epoch": 2.0, "eval_accuracy": 0.8463847885914436, "eval_f1": 0.0, "eval_loss": 0.5737118721008301, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 0.926, "eval_samples_per_second": 201.946, "eval_steps_per_second": 3.24, "step": 212 }, { "epoch": 3.0, "grad_norm": 1.2157526016235352, "learning_rate": 4.25e-05, "loss": 0.5874, "step": 318 }, { "epoch": 3.0, "eval_accuracy": 0.8633975481611208, "eval_f1": 0.10071942446043167, "eval_loss": 0.4661254584789276, "eval_precision": 0.2692307692307692, "eval_recall": 0.061946902654867256, "eval_runtime": 0.9773, "eval_samples_per_second": 191.347, "eval_steps_per_second": 3.07, "step": 318 }, { "epoch": 4.0, "grad_norm": 2.093184232711792, "learning_rate": 4e-05, "loss": 0.4729, "step": 424 }, { "epoch": 4.0, "eval_accuracy": 0.898173630222667, "eval_f1": 0.37722419928825623, "eval_loss": 0.3599094748497009, "eval_precision": 0.47533632286995514, "eval_recall": 0.31268436578171094, "eval_runtime": 0.9352, "eval_samples_per_second": 199.961, "eval_steps_per_second": 3.208, "step": 424 }, { "epoch": 5.0, "grad_norm": 4.716677665710449, "learning_rate": 3.7500000000000003e-05, "loss": 0.3692, "step": 530 }, { "epoch": 5.0, "eval_accuracy": 0.9246935201401051, "eval_f1": 0.5917496443812232, "eval_loss": 0.29404327273368835, "eval_precision": 0.5714285714285714, "eval_recall": 0.6135693215339233, "eval_runtime": 0.899, "eval_samples_per_second": 208.018, "eval_steps_per_second": 3.337, "step": 530 }, { "epoch": 6.0, "grad_norm": 1.1444728374481201, "learning_rate": 3.5e-05, "loss": 0.3058, "step": 636 }, { "epoch": 6.0, "eval_accuracy": 0.9334500875656743, "eval_f1": 0.6621621621621622, "eval_loss": 0.2527407705783844, "eval_precision": 0.6109725685785536, "eval_recall": 0.7227138643067846, "eval_runtime": 0.8914, "eval_samples_per_second": 209.792, "eval_steps_per_second": 3.366, "step": 636 }, { "epoch": 7.0, "grad_norm": 0.998134434223175, "learning_rate": 3.2500000000000004e-05, "loss": 0.2636, "step": 742 }, { "epoch": 7.0, "eval_accuracy": 0.937453089817363, "eval_f1": 0.6954177897574124, "eval_loss": 0.22462092339992523, "eval_precision": 0.6401985111662531, "eval_recall": 0.7610619469026548, "eval_runtime": 0.9047, "eval_samples_per_second": 206.701, "eval_steps_per_second": 3.316, "step": 742 }, { "epoch": 8.0, "grad_norm": 1.480947732925415, "learning_rate": 3e-05, "loss": 0.24, "step": 848 }, { "epoch": 8.0, "eval_accuracy": 0.9417062797097824, "eval_f1": 0.7241379310344828, "eval_loss": 0.20909550786018372, "eval_precision": 0.6578313253012048, "eval_recall": 0.8053097345132744, "eval_runtime": 0.9189, "eval_samples_per_second": 203.503, "eval_steps_per_second": 3.265, "step": 848 }, { "epoch": 9.0, "grad_norm": 1.5677202939987183, "learning_rate": 2.7500000000000004e-05, "loss": 0.2228, "step": 954 }, { "epoch": 9.0, "eval_accuracy": 0.940205153865399, "eval_f1": 0.716883116883117, "eval_loss": 0.1985715925693512, "eval_precision": 0.6403712296983759, "eval_recall": 0.8141592920353983, "eval_runtime": 0.9225, "eval_samples_per_second": 202.716, "eval_steps_per_second": 3.252, "step": 954 }, { "epoch": 10.0, "grad_norm": 4.242694854736328, "learning_rate": 2.5e-05, "loss": 0.2105, "step": 1060 }, { "epoch": 10.0, "eval_accuracy": 0.9417062797097824, "eval_f1": 0.733245729303548, "eval_loss": 0.1821284145116806, "eval_precision": 0.6611374407582938, "eval_recall": 0.8230088495575221, "eval_runtime": 0.925, "eval_samples_per_second": 202.159, "eval_steps_per_second": 3.243, "step": 1060 }, { "epoch": 11.0, "grad_norm": 1.5337995290756226, "learning_rate": 2.25e-05, "loss": 0.2007, "step": 1166 }, { "epoch": 11.0, "eval_accuracy": 0.9432074055541656, "eval_f1": 0.7394736842105263, "eval_loss": 0.17939399182796478, "eval_precision": 0.667458432304038, "eval_recall": 0.8289085545722714, "eval_runtime": 0.9211, "eval_samples_per_second": 203.028, "eval_steps_per_second": 3.257, "step": 1166 }, { "epoch": 12.0, "grad_norm": 2.7884016036987305, "learning_rate": 2e-05, "loss": 0.195, "step": 1272 }, { "epoch": 12.0, "eval_accuracy": 0.9429572179134351, "eval_f1": 0.7392996108949417, "eval_loss": 0.18079817295074463, "eval_precision": 0.6597222222222222, "eval_recall": 0.8407079646017699, "eval_runtime": 0.9116, "eval_samples_per_second": 205.139, "eval_steps_per_second": 3.291, "step": 1272 }, { "epoch": 13.0, "grad_norm": 1.4956753253936768, "learning_rate": 1.75e-05, "loss": 0.19, "step": 1378 }, { "epoch": 13.0, "eval_accuracy": 0.9459594696022017, "eval_f1": 0.7463479415670652, "eval_loss": 0.16896921396255493, "eval_precision": 0.678743961352657, "eval_recall": 0.8289085545722714, "eval_runtime": 0.9045, "eval_samples_per_second": 206.741, "eval_steps_per_second": 3.317, "step": 1378 }, { "epoch": 14.0, "grad_norm": 1.4634425640106201, "learning_rate": 1.5e-05, "loss": 0.1835, "step": 1484 }, { "epoch": 14.0, "eval_accuracy": 0.9477107830873155, "eval_f1": 0.751336898395722, "eval_loss": 0.16314250230789185, "eval_precision": 0.687041564792176, "eval_recall": 0.8289085545722714, "eval_runtime": 0.9091, "eval_samples_per_second": 205.694, "eval_steps_per_second": 3.3, "step": 1484 }, { "epoch": 15.0, "grad_norm": 2.0593953132629395, "learning_rate": 1.25e-05, "loss": 0.1821, "step": 1590 }, { "epoch": 15.0, "eval_accuracy": 0.9472104078058544, "eval_f1": 0.753968253968254, "eval_loss": 0.16711552441120148, "eval_precision": 0.6834532374100719, "eval_recall": 0.8407079646017699, "eval_runtime": 0.9129, "eval_samples_per_second": 204.846, "eval_steps_per_second": 3.286, "step": 1590 }, { "epoch": 16.0, "grad_norm": 1.1103074550628662, "learning_rate": 1e-05, "loss": 0.1774, "step": 1696 }, { "epoch": 16.0, "eval_accuracy": 0.9472104078058544, "eval_f1": 0.7647831800262812, "eval_loss": 0.16675202548503876, "eval_precision": 0.6895734597156398, "eval_recall": 0.8584070796460177, "eval_runtime": 0.913, "eval_samples_per_second": 204.819, "eval_steps_per_second": 3.286, "step": 1696 }, { "epoch": 17.0, "grad_norm": 2.5177829265594482, "learning_rate": 7.5e-06, "loss": 0.1764, "step": 1802 }, { "epoch": 17.0, "eval_accuracy": 0.9477107830873155, "eval_f1": 0.7602649006622517, "eval_loss": 0.16347847878932953, "eval_precision": 0.6899038461538461, "eval_recall": 0.8466076696165191, "eval_runtime": 0.9243, "eval_samples_per_second": 202.326, "eval_steps_per_second": 3.246, "step": 1802 }, { "epoch": 18.0, "grad_norm": 0.8699701428413391, "learning_rate": 5e-06, "loss": 0.1729, "step": 1908 }, { "epoch": 18.0, "eval_accuracy": 0.9472104078058544, "eval_f1": 0.7611548556430445, "eval_loss": 0.16536261141300201, "eval_precision": 0.6855791962174941, "eval_recall": 0.855457227138643, "eval_runtime": 0.9101, "eval_samples_per_second": 205.482, "eval_steps_per_second": 3.297, "step": 1908 }, { "epoch": 19.0, "grad_norm": 2.786510705947876, "learning_rate": 2.5e-06, "loss": 0.1726, "step": 2014 }, { "epoch": 19.0, "eval_accuracy": 0.9477107830873155, "eval_f1": 0.7621550591327202, "eval_loss": 0.16280074417591095, "eval_precision": 0.6872037914691943, "eval_recall": 0.855457227138643, "eval_runtime": 0.9163, "eval_samples_per_second": 204.074, "eval_steps_per_second": 3.274, "step": 2014 }, { "epoch": 20.0, "grad_norm": 1.0649765729904175, "learning_rate": 0.0, "loss": 0.1684, "step": 2120 }, { "epoch": 20.0, "eval_accuracy": 0.9472104078058544, "eval_f1": 0.7595269382391592, "eval_loss": 0.16256052255630493, "eval_precision": 0.6848341232227488, "eval_recall": 0.8525073746312685, "eval_runtime": 0.9134, "eval_samples_per_second": 204.735, "eval_steps_per_second": 3.285, "step": 2120 }, { "epoch": 20.0, "step": 2120, "total_flos": 904262544144960.0, "train_loss": 0.3163445589677343, "train_runtime": 244.3626, "train_samples_per_second": 138.074, "train_steps_per_second": 8.676 } ], "logging_steps": 500, "max_steps": 2120, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 904262544144960.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }