{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.0894904136657715, "learning_rate": 4.75e-05, "loss": 0.541, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7293233082706767, "eval_f1": 0.6396989966555184, "eval_loss": 0.4985284209251404, "eval_precision": 0.664766661583041, "eval_recall": 0.630978359701764, "eval_runtime": 5.2157, "eval_samples_per_second": 76.5, "eval_steps_per_second": 9.586, "step": 122 }, { "epoch": 2.0, "grad_norm": 5.95181941986084, "learning_rate": 4.5e-05, "loss": 0.4477, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7644110275689223, "eval_f1": 0.7461557203963398, "eval_loss": 0.44652456045150757, "eval_precision": 0.7426785714285714, "eval_recall": 0.7883251500272777, "eval_runtime": 5.1119, "eval_samples_per_second": 78.053, "eval_steps_per_second": 9.781, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.869917154312134, "learning_rate": 4.25e-05, "loss": 0.347, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8255102040816327, "eval_loss": 0.323697566986084, "eval_precision": 0.8556293485135991, "eval_recall": 0.8067375886524822, "eval_runtime": 5.1398, "eval_samples_per_second": 77.63, "eval_steps_per_second": 9.728, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.4031054973602295, "learning_rate": 4e-05, "loss": 0.3005, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8683279483657071, "eval_loss": 0.2991793751716614, "eval_precision": 0.873366724738676, "eval_recall": 0.863747954173486, "eval_runtime": 5.1616, "eval_samples_per_second": 77.302, "eval_steps_per_second": 9.687, "step": 488 }, { "epoch": 5.0, "grad_norm": 3.774945020675659, "learning_rate": 3.7500000000000003e-05, "loss": 0.281, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8342105263157895, "eval_loss": 0.2868594527244568, "eval_precision": 0.8398085585585586, "eval_recall": 0.82924168030551, "eval_runtime": 5.1483, "eval_samples_per_second": 77.502, "eval_steps_per_second": 9.712, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.3688924312591553, "learning_rate": 3.5e-05, "loss": 0.2419, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8537390029325513, "eval_loss": 0.29453349113464355, "eval_precision": 0.8442805058676086, "eval_recall": 0.8663393344244408, "eval_runtime": 5.0969, "eval_samples_per_second": 78.284, "eval_steps_per_second": 9.81, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.5997357964515686, "learning_rate": 3.2500000000000004e-05, "loss": 0.2394, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8529524583135901, "eval_loss": 0.2835337221622467, "eval_precision": 0.8504480286738352, "eval_recall": 0.8556101109292599, "eval_runtime": 5.0928, "eval_samples_per_second": 78.346, "eval_steps_per_second": 9.818, "step": 854 }, { "epoch": 8.0, "grad_norm": 2.8720760345458984, "learning_rate": 3e-05, "loss": 0.2192, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.850729517396184, "eval_loss": 0.28028008341789246, "eval_precision": 0.8535087719298247, "eval_recall": 0.8481087470449173, "eval_runtime": 5.1964, "eval_samples_per_second": 76.784, "eval_steps_per_second": 9.622, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.954423904418945, "learning_rate": 2.7500000000000004e-05, "loss": 0.2144, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8480717680029244, "eval_loss": 0.28611448407173157, "eval_precision": 0.8498775260257195, "eval_recall": 0.8463356973995272, "eval_runtime": 5.1448, "eval_samples_per_second": 77.554, "eval_steps_per_second": 9.719, "step": 1098 }, { "epoch": 10.0, "grad_norm": 4.511594772338867, "learning_rate": 2.5e-05, "loss": 0.2056, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8696722245432793, "eval_loss": 0.27244648337364197, "eval_precision": 0.8706135006701596, "eval_recall": 0.8687488634297145, "eval_runtime": 5.104, "eval_samples_per_second": 78.174, "eval_steps_per_second": 9.796, "step": 1220 }, { "epoch": 11.0, "grad_norm": 2.713789463043213, "learning_rate": 2.25e-05, "loss": 0.1822, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8662440310793597, "eval_loss": 0.28133633732795715, "eval_precision": 0.8606158357771261, "eval_recall": 0.872704128023277, "eval_runtime": 5.1442, "eval_samples_per_second": 77.563, "eval_steps_per_second": 9.72, "step": 1342 }, { "epoch": 12.0, "grad_norm": 6.766155242919922, "learning_rate": 2e-05, "loss": 0.1817, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8584001703456596, "eval_loss": 0.2900215685367584, "eval_precision": 0.8759655377302435, "eval_recall": 0.8451991271140207, "eval_runtime": 5.1374, "eval_samples_per_second": 77.665, "eval_steps_per_second": 9.733, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.9034644365310669, "learning_rate": 1.75e-05, "loss": 0.1621, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8710526315789473, "eval_loss": 0.29263192415237427, "eval_precision": 0.8772522522522522, "eval_recall": 0.8655210038188761, "eval_runtime": 5.1149, "eval_samples_per_second": 78.008, "eval_steps_per_second": 9.775, "step": 1586 }, { "epoch": 14.0, "grad_norm": 6.05497932434082, "learning_rate": 1.5e-05, "loss": 0.1577, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8709582797445791, "eval_loss": 0.29044803977012634, "eval_precision": 0.8683243727598566, "eval_recall": 0.8737497726859429, "eval_runtime": 5.096, "eval_samples_per_second": 78.297, "eval_steps_per_second": 9.812, "step": 1708 }, { "epoch": 15.0, "grad_norm": 3.013443946838379, "learning_rate": 1.25e-05, "loss": 0.1612, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8587719298245614, "eval_loss": 0.2996305227279663, "eval_precision": 0.864771021021021, "eval_recall": 0.8534278959810875, "eval_runtime": 5.1189, "eval_samples_per_second": 77.946, "eval_steps_per_second": 9.768, "step": 1830 }, { "epoch": 16.0, "grad_norm": 1.775810718536377, "learning_rate": 1e-05, "loss": 0.1496, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8649563392675828, "eval_loss": 0.29704856872558594, "eval_precision": 0.8623655913978494, "eval_recall": 0.8677032187670486, "eval_runtime": 5.2102, "eval_samples_per_second": 76.58, "eval_steps_per_second": 9.596, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.5526378154754639, "learning_rate": 7.5e-06, "loss": 0.149, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.858259325044405, "eval_loss": 0.29482966661453247, "eval_precision": 0.8573798178418481, "eval_recall": 0.8591562102200401, "eval_runtime": 5.1218, "eval_samples_per_second": 77.902, "eval_steps_per_second": 9.762, "step": 2074 }, { "epoch": 18.0, "grad_norm": 4.444484233856201, "learning_rate": 5e-06, "loss": 0.1424, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8609292598654301, "eval_loss": 0.29769569635391235, "eval_precision": 0.8609292598654301, "eval_recall": 0.8609292598654301, "eval_runtime": 5.0934, "eval_samples_per_second": 78.336, "eval_steps_per_second": 9.817, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.0727450847625732, "learning_rate": 2.5e-06, "loss": 0.1383, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8602260265626904, "eval_loss": 0.2990491986274719, "eval_precision": 0.8620943049601959, "eval_recall": 0.8584288052373159, "eval_runtime": 5.1176, "eval_samples_per_second": 77.966, "eval_steps_per_second": 9.77, "step": 2318 }, { "epoch": 20.0, "grad_norm": 4.4248151779174805, "learning_rate": 0.0, "loss": 0.1407, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.858259325044405, "eval_loss": 0.2988053262233734, "eval_precision": 0.8573798178418481, "eval_recall": 0.8591562102200401, "eval_runtime": 5.0823, "eval_samples_per_second": 78.508, "eval_steps_per_second": 9.838, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.2301350734272941, "train_runtime": 1951.0131, "train_samples_per_second": 37.293, "train_steps_per_second": 1.251 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }