{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 10.745368957519531, "learning_rate": 4.75e-05, "loss": 0.3971, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.851257564821498, "eval_loss": 0.27043774724006653, "eval_precision": 0.8714882943143812, "eval_recall": 0.8366521185670122, "eval_runtime": 4.9491, "eval_samples_per_second": 80.621, "eval_steps_per_second": 10.103, "step": 122 }, { "epoch": 2.0, "grad_norm": 7.182548999786377, "learning_rate": 4.5e-05, "loss": 0.214, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.9197994987468672, "eval_f1": 0.9032551372976905, "eval_loss": 0.23814059793949127, "eval_precision": 0.9032551372976905, "eval_recall": 0.9032551372976905, "eval_runtime": 4.9784, "eval_samples_per_second": 80.146, "eval_steps_per_second": 10.043, "step": 244 }, { "epoch": 3.0, "grad_norm": 93.81620025634766, "learning_rate": 4.25e-05, "loss": 0.1301, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8439374185136896, "eval_loss": 0.38320210576057434, "eval_precision": 0.8794955044955045, "eval_recall": 0.822376795781051, "eval_runtime": 5.0811, "eval_samples_per_second": 78.526, "eval_steps_per_second": 9.84, "step": 366 }, { "epoch": 4.0, "grad_norm": 0.22968685626983643, "learning_rate": 4e-05, "loss": 0.0904, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8782051282051282, "eval_loss": 0.3873072564601898, "eval_precision": 0.8661616161616161, "eval_recall": 0.8955264593562466, "eval_runtime": 4.9496, "eval_samples_per_second": 80.613, "eval_steps_per_second": 10.102, "step": 488 }, { "epoch": 5.0, "grad_norm": 0.057639382779598236, "learning_rate": 3.7500000000000003e-05, "loss": 0.0587, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.9147869674185464, "eval_f1": 0.8956140350877193, "eval_loss": 0.4033690392971039, "eval_precision": 0.9022147147147147, "eval_recall": 0.8897072194944535, "eval_runtime": 4.9468, "eval_samples_per_second": 80.659, "eval_steps_per_second": 10.108, "step": 610 }, { "epoch": 6.0, "grad_norm": 0.12450123578310013, "learning_rate": 3.5e-05, "loss": 0.0496, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8755702215614461, "eval_loss": 0.5248555541038513, "eval_precision": 0.8632157235517781, "eval_recall": 0.8937534097108566, "eval_runtime": 4.9627, "eval_samples_per_second": 80.399, "eval_steps_per_second": 10.075, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.029339170083403587, "learning_rate": 3.2500000000000004e-05, "loss": 0.0362, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8842272010790718, "eval_loss": 0.533008873462677, "eval_precision": 0.8999348109517601, "eval_recall": 0.8718857974177123, "eval_runtime": 4.956, "eval_samples_per_second": 80.508, "eval_steps_per_second": 10.089, "step": 854 }, { "epoch": 8.0, "grad_norm": 0.012251710519194603, "learning_rate": 3e-05, "loss": 0.0223, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8769602202215754, "eval_loss": 0.6168325543403625, "eval_precision": 0.8742831541218639, "eval_recall": 0.8797963266048372, "eval_runtime": 4.9435, "eval_samples_per_second": 80.712, "eval_steps_per_second": 10.114, "step": 976 }, { "epoch": 9.0, "grad_norm": 0.027291299775242805, "learning_rate": 2.7500000000000004e-05, "loss": 0.0191, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8674628282189181, "eval_loss": 0.7103565335273743, "eval_precision": 0.8869858462356303, "eval_recall": 0.8530187306783051, "eval_runtime": 4.9624, "eval_samples_per_second": 80.405, "eval_steps_per_second": 10.076, "step": 1098 }, { "epoch": 10.0, "grad_norm": 0.26321402192115784, "learning_rate": 2.5e-05, "loss": 0.0147, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8775533117267087, "eval_loss": 0.6125138998031616, "eval_precision": 0.873246730188791, "eval_recall": 0.8822967812329514, "eval_runtime": 4.95, "eval_samples_per_second": 80.606, "eval_steps_per_second": 10.101, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.00528654595836997, "learning_rate": 2.25e-05, "loss": 0.0121, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.888343937787204, "eval_loss": 0.6882844567298889, "eval_precision": 0.8790267011197244, "eval_recall": 0.9001182033096926, "eval_runtime": 4.9822, "eval_samples_per_second": 80.085, "eval_steps_per_second": 10.036, "step": 1342 }, { "epoch": 12.0, "grad_norm": 0.007998102344572544, "learning_rate": 2e-05, "loss": 0.0118, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8781334505389722, "eval_loss": 0.6760030388832092, "eval_precision": 0.872316715542522, "eval_recall": 0.8847972358610656, "eval_runtime": 4.9668, "eval_samples_per_second": 80.334, "eval_steps_per_second": 10.067, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.002160990610718727, "learning_rate": 1.75e-05, "loss": 0.0034, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8851154755410074, "eval_loss": 0.7163070440292358, "eval_precision": 0.8851154755410074, "eval_recall": 0.8851154755410074, "eval_runtime": 4.9658, "eval_samples_per_second": 80.349, "eval_steps_per_second": 10.069, "step": 1586 }, { "epoch": 14.0, "grad_norm": 0.09897086024284363, "learning_rate": 1.5e-05, "loss": 0.0064, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8884169154604891, "eval_loss": 0.7180221080780029, "eval_precision": 0.8874630556728391, "eval_recall": 0.8893889798145117, "eval_runtime": 4.9417, "eval_samples_per_second": 80.741, "eval_steps_per_second": 10.118, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.004853605292737484, "learning_rate": 1.25e-05, "loss": 0.008, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8807643913180245, "eval_loss": 0.6914934515953064, "eval_precision": 0.8978286538966151, "eval_recall": 0.8676122931442081, "eval_runtime": 4.9615, "eval_samples_per_second": 80.42, "eval_steps_per_second": 10.078, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.0016086915275081992, "learning_rate": 1e-05, "loss": 0.0052, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8884169154604891, "eval_loss": 0.6778000593185425, "eval_precision": 0.8874630556728391, "eval_recall": 0.8893889798145117, "eval_runtime": 4.9627, "eval_samples_per_second": 80.4, "eval_steps_per_second": 10.075, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.0021244632080197334, "learning_rate": 7.5e-06, "loss": 0.0066, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8884169154604891, "eval_loss": 0.6992811560630798, "eval_precision": 0.8874630556728391, "eval_recall": 0.8893889798145117, "eval_runtime": 4.9649, "eval_samples_per_second": 80.364, "eval_steps_per_second": 10.071, "step": 2074 }, { "epoch": 18.0, "grad_norm": 0.0017945035360753536, "learning_rate": 5e-06, "loss": 0.0053, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8814283033033032, "eval_loss": 0.6966382265090942, "eval_precision": 0.895578231292517, "eval_recall": 0.8701127477723223, "eval_runtime": 4.9631, "eval_samples_per_second": 80.393, "eval_steps_per_second": 10.074, "step": 2196 }, { "epoch": 19.0, "grad_norm": 0.0012101498432457447, "learning_rate": 2.5e-06, "loss": 0.0022, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8765393898137261, "eval_loss": 0.7112175822257996, "eval_precision": 0.8852261942423283, "eval_recall": 0.8690671031096563, "eval_runtime": 4.9613, "eval_samples_per_second": 80.422, "eval_steps_per_second": 10.078, "step": 2318 }, { "epoch": 20.0, "grad_norm": 0.009784560650587082, "learning_rate": 0.0, "loss": 0.0042, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8793019197207679, "eval_loss": 0.7049560546875, "eval_precision": 0.8893184421534936, "eval_recall": 0.8708401527550463, "eval_runtime": 4.9243, "eval_samples_per_second": 81.027, "eval_steps_per_second": 10.154, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7584162436176000.0, "train_loss": 0.05485985055565834, "train_runtime": 2706.9642, "train_samples_per_second": 26.879, "train_steps_per_second": 0.901 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7584162436176000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }