{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.532687187194824, "learning_rate": 4.75e-05, "loss": 0.551, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7243107769423559, "eval_f1": 0.6144317942230656, "eval_loss": 0.5008909106254578, "eval_precision": 0.65566534914361, "eval_recall": 0.6074286233860702, "eval_runtime": 1.7719, "eval_samples_per_second": 225.176, "eval_steps_per_second": 28.218, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.3572421073913574, "learning_rate": 4.5e-05, "loss": 0.4528, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7819548872180451, "eval_f1": 0.7578021978021978, "eval_loss": 0.4117695391178131, "eval_precision": 0.7486942070275404, "eval_recall": 0.7857337697763229, "eval_runtime": 1.781, "eval_samples_per_second": 224.038, "eval_steps_per_second": 28.075, "step": 244 }, { "epoch": 3.0, "grad_norm": 2.5434460639953613, "learning_rate": 4.25e-05, "loss": 0.3588, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8063505120717311, "eval_loss": 0.3427870571613312, "eval_precision": 0.844033177433499, "eval_recall": 0.7853700672849608, "eval_runtime": 1.7815, "eval_samples_per_second": 223.975, "eval_steps_per_second": 28.067, "step": 366 }, { "epoch": 4.0, "grad_norm": 4.183424949645996, "learning_rate": 4e-05, "loss": 0.3192, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8246499363520641, "eval_loss": 0.31169387698173523, "eval_precision": 0.8246499363520641, "eval_recall": 0.8246499363520641, "eval_runtime": 1.8089, "eval_samples_per_second": 220.578, "eval_steps_per_second": 27.641, "step": 488 }, { "epoch": 5.0, "grad_norm": 1.7957929372787476, "learning_rate": 3.7500000000000003e-05, "loss": 0.2714, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8446181767415888, "eval_loss": 0.3037036657333374, "eval_precision": 0.835902201887332, "eval_recall": 0.8560192762320422, "eval_runtime": 1.8052, "eval_samples_per_second": 221.025, "eval_steps_per_second": 27.697, "step": 610 }, { "epoch": 6.0, "grad_norm": 3.427933692932129, "learning_rate": 3.5e-05, "loss": 0.257, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8475258334958082, "eval_loss": 0.2832619249820709, "eval_precision": 0.8591828192414193, "eval_recall": 0.8381069285324605, "eval_runtime": 1.8047, "eval_samples_per_second": 221.086, "eval_steps_per_second": 27.705, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.5598583221435547, "learning_rate": 3.2500000000000004e-05, "loss": 0.2405, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8616171059774413, "eval_loss": 0.28606978058815, "eval_precision": 0.859873949579832, "eval_recall": 0.8634297144935443, "eval_runtime": 1.8066, "eval_samples_per_second": 220.859, "eval_steps_per_second": 27.677, "step": 854 }, { "epoch": 8.0, "grad_norm": 1.1964571475982666, "learning_rate": 3e-05, "loss": 0.2163, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8556004584112431, "eval_loss": 0.295376181602478, "eval_precision": 0.8538865546218487, "eval_recall": 0.85738316057465, "eval_runtime": 1.8106, "eval_samples_per_second": 220.364, "eval_steps_per_second": 27.614, "step": 976 }, { "epoch": 9.0, "grad_norm": 4.184772491455078, "learning_rate": 2.7500000000000004e-05, "loss": 0.2135, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8472902633190447, "eval_loss": 0.29421547055244446, "eval_precision": 0.8510272912927781, "eval_recall": 0.8438352427714131, "eval_runtime": 1.8081, "eval_samples_per_second": 220.677, "eval_steps_per_second": 27.654, "step": 1098 }, { "epoch": 10.0, "grad_norm": 4.817326545715332, "learning_rate": 2.5e-05, "loss": 0.2001, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8537492688633261, "eval_loss": 0.3002479076385498, "eval_precision": 0.8656062850151329, "eval_recall": 0.8441534824513548, "eval_runtime": 1.8147, "eval_samples_per_second": 219.877, "eval_steps_per_second": 27.553, "step": 1220 }, { "epoch": 11.0, "grad_norm": 2.364151954650879, "learning_rate": 2.25e-05, "loss": 0.1825, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8676331036823873, "eval_loss": 0.30105578899383545, "eval_precision": 0.8749292230261088, "eval_recall": 0.8612474995453718, "eval_runtime": 1.7869, "eval_samples_per_second": 223.295, "eval_steps_per_second": 27.982, "step": 1342 }, { "epoch": 12.0, "grad_norm": 4.355996608734131, "learning_rate": 2e-05, "loss": 0.1765, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8656154317207594, "eval_loss": 0.28576698899269104, "eval_precision": 0.869546382820719, "eval_recall": 0.861974904528096, "eval_runtime": 1.7863, "eval_samples_per_second": 223.361, "eval_steps_per_second": 27.99, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.647818922996521, "learning_rate": 1.75e-05, "loss": 0.1674, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8748655913978494, "eval_loss": 0.29319024085998535, "eval_precision": 0.86983032873807, "eval_recall": 0.8805237315875614, "eval_runtime": 1.7923, "eval_samples_per_second": 222.613, "eval_steps_per_second": 27.896, "step": 1586 }, { "epoch": 14.0, "grad_norm": 2.9686295986175537, "learning_rate": 1.5e-05, "loss": 0.1597, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8668668668668669, "eval_loss": 0.2937151789665222, "eval_precision": 0.8598901098901099, "eval_recall": 0.8752045826513912, "eval_runtime": 1.8072, "eval_samples_per_second": 220.782, "eval_steps_per_second": 27.667, "step": 1708 }, { "epoch": 15.0, "grad_norm": 2.176563262939453, "learning_rate": 1.25e-05, "loss": 0.1564, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8717238211879976, "eval_loss": 0.2963174879550934, "eval_precision": 0.8757194133300328, "eval_recall": 0.8680214584469903, "eval_runtime": 1.8185, "eval_samples_per_second": 219.414, "eval_steps_per_second": 27.495, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.7651334404945374, "learning_rate": 1e-05, "loss": 0.142, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8683279483657071, "eval_loss": 0.302460253238678, "eval_precision": 0.873366724738676, "eval_recall": 0.863747954173486, "eval_runtime": 1.8132, "eval_samples_per_second": 220.059, "eval_steps_per_second": 27.576, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.19514738023281097, "learning_rate": 7.5e-06, "loss": 0.143, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8682773109243698, "eval_loss": 0.2950632870197296, "eval_precision": 0.864855223259409, "eval_recall": 0.8719767230405528, "eval_runtime": 1.8087, "eval_samples_per_second": 220.597, "eval_steps_per_second": 27.644, "step": 2074 }, { "epoch": 18.0, "grad_norm": 0.15066123008728027, "learning_rate": 5e-06, "loss": 0.1315, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.858259325044405, "eval_loss": 0.3012860417366028, "eval_precision": 0.8573798178418481, "eval_recall": 0.8591562102200401, "eval_runtime": 1.8058, "eval_samples_per_second": 220.949, "eval_steps_per_second": 27.688, "step": 2196 }, { "epoch": 19.0, "grad_norm": 0.04642534255981445, "learning_rate": 2.5e-06, "loss": 0.1378, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8629148629148629, "eval_loss": 0.30377450585365295, "eval_precision": 0.8657894736842104, "eval_recall": 0.860201854882706, "eval_runtime": 1.8121, "eval_samples_per_second": 220.187, "eval_steps_per_second": 27.592, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.5789785385131836, "learning_rate": 0.0, "loss": 0.1333, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8609292598654301, "eval_loss": 0.29736196994781494, "eval_precision": 0.8609292598654301, "eval_recall": 0.8609292598654301, "eval_runtime": 1.814, "eval_samples_per_second": 219.959, "eval_steps_per_second": 27.564, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.23053875125822473, "train_runtime": 628.042, "train_samples_per_second": 115.852, "train_steps_per_second": 3.885 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }