{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.279876708984375, "learning_rate": 4.75e-05, "loss": 0.5457, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7192982456140351, "eval_f1": 0.6013986013986015, "eval_loss": 0.4752861261367798, "eval_precision": 0.6464762782128051, "eval_recall": 0.5963811602109474, "eval_runtime": 5.2249, "eval_samples_per_second": 76.365, "eval_steps_per_second": 9.57, "step": 122 }, { "epoch": 2.0, "grad_norm": 5.457512855529785, "learning_rate": 4.5e-05, "loss": 0.4518, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7969924812030075, "eval_f1": 0.7684901970616256, "eval_loss": 0.4070126414299011, "eval_precision": 0.7589055735189347, "eval_recall": 0.7863702491362066, "eval_runtime": 5.1546, "eval_samples_per_second": 77.407, "eval_steps_per_second": 9.7, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.400643825531006, "learning_rate": 4.25e-05, "loss": 0.3461, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8421052631578947, "eval_f1": 0.7970482088214634, "eval_loss": 0.3412257730960846, "eval_precision": 0.8231252179979072, "eval_recall": 0.7807783233315149, "eval_runtime": 5.1898, "eval_samples_per_second": 76.882, "eval_steps_per_second": 9.634, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.3028647899627686, "learning_rate": 4e-05, "loss": 0.2958, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8228567054500919, "eval_loss": 0.3252820372581482, "eval_precision": 0.8263351692555232, "eval_recall": 0.8196490270958356, "eval_runtime": 5.1759, "eval_samples_per_second": 77.088, "eval_steps_per_second": 9.66, "step": 488 }, { "epoch": 5.0, "grad_norm": 0.7779637575149536, "learning_rate": 3.7500000000000003e-05, "loss": 0.2659, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8560793854229822, "eval_loss": 0.2941116690635681, "eval_precision": 0.8609538327526132, "eval_recall": 0.8516548463356974, "eval_runtime": 5.1453, "eval_samples_per_second": 77.546, "eval_steps_per_second": 9.718, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.210702896118164, "learning_rate": 3.5e-05, "loss": 0.2482, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8563451067988272, "eval_loss": 0.2965351343154907, "eval_precision": 0.8473119816985988, "eval_recall": 0.8681123840698308, "eval_runtime": 5.1607, "eval_samples_per_second": 77.315, "eval_steps_per_second": 9.689, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.5390880703926086, "learning_rate": 3.2500000000000004e-05, "loss": 0.2264, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8530841286673736, "eval_loss": 0.286903977394104, "eval_precision": 0.8446597760551249, "eval_recall": 0.8638388797963266, "eval_runtime": 5.4309, "eval_samples_per_second": 73.468, "eval_steps_per_second": 9.207, "step": 854 }, { "epoch": 8.0, "grad_norm": 2.480511426925659, "learning_rate": 3e-05, "loss": 0.2218, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8730431462390225, "eval_loss": 0.2795129120349884, "eval_precision": 0.8961111111111111, "eval_recall": 0.8565648299690853, "eval_runtime": 5.1715, "eval_samples_per_second": 77.153, "eval_steps_per_second": 9.668, "step": 976 }, { "epoch": 9.0, "grad_norm": 6.716876029968262, "learning_rate": 2.7500000000000004e-05, "loss": 0.2106, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8715803025426456, "eval_loss": 0.27050527930259705, "eval_precision": 0.8673433153814287, "eval_recall": 0.8762502273140571, "eval_runtime": 5.1606, "eval_samples_per_second": 77.317, "eval_steps_per_second": 9.689, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.200309991836548, "learning_rate": 2.5e-05, "loss": 0.1981, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8867007927797945, "eval_loss": 0.275096595287323, "eval_precision": 0.89198606271777, "eval_recall": 0.8818876159301692, "eval_runtime": 5.1671, "eval_samples_per_second": 77.22, "eval_steps_per_second": 9.677, "step": 1220 }, { "epoch": 11.0, "grad_norm": 2.14013409614563, "learning_rate": 2.25e-05, "loss": 0.1802, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8862394957983193, "eval_loss": 0.27452006936073303, "eval_precision": 0.8826476009275225, "eval_recall": 0.8901163847972358, "eval_runtime": 5.1559, "eval_samples_per_second": 77.387, "eval_steps_per_second": 9.698, "step": 1342 }, { "epoch": 12.0, "grad_norm": 6.242837905883789, "learning_rate": 2e-05, "loss": 0.1828, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8854915648632926, "eval_loss": 0.279909610748291, "eval_precision": 0.8956662848415425, "eval_recall": 0.8768867066739408, "eval_runtime": 5.1353, "eval_samples_per_second": 77.698, "eval_steps_per_second": 9.737, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.8258763551712036, "learning_rate": 1.75e-05, "loss": 0.1707, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.9097744360902256, "eval_f1": 0.8894736842105263, "eval_loss": 0.27386558055877686, "eval_precision": 0.8959740990990992, "eval_recall": 0.8836606655755592, "eval_runtime": 5.1775, "eval_samples_per_second": 77.065, "eval_steps_per_second": 9.657, "step": 1586 }, { "epoch": 14.0, "grad_norm": 3.6909942626953125, "learning_rate": 1.5e-05, "loss": 0.1606, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.888964101175568, "eval_loss": 0.28680744767189026, "eval_precision": 0.8862007168458781, "eval_recall": 0.8918894344426259, "eval_runtime": 5.1734, "eval_samples_per_second": 77.126, "eval_steps_per_second": 9.665, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.49116143584251404, "learning_rate": 1.25e-05, "loss": 0.1499, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8817957385392532, "eval_loss": 0.2929786145687103, "eval_precision": 0.8827677592299257, "eval_recall": 0.8808419712675032, "eval_runtime": 5.1562, "eval_samples_per_second": 77.383, "eval_steps_per_second": 9.697, "step": 1830 }, { "epoch": 16.0, "grad_norm": 1.3892961740493774, "learning_rate": 1e-05, "loss": 0.1555, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8760282890453928, "eval_loss": 0.3041006028652191, "eval_precision": 0.8682260305697083, "eval_recall": 0.8855246408437898, "eval_runtime": 5.1466, "eval_samples_per_second": 77.527, "eval_steps_per_second": 9.715, "step": 1952 }, { "epoch": 17.0, "grad_norm": 3.9728598594665527, "learning_rate": 7.5e-06, "loss": 0.1396, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8823853973772722, "eval_loss": 0.28764259815216064, "eval_precision": 0.8814464081066409, "eval_recall": 0.8833424258956174, "eval_runtime": 5.1398, "eval_samples_per_second": 77.63, "eval_steps_per_second": 9.728, "step": 2074 }, { "epoch": 18.0, "grad_norm": 3.8131844997406006, "learning_rate": 5e-06, "loss": 0.1477, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8845345436822225, "eval_loss": 0.28996890783309937, "eval_precision": 0.8865278628291489, "eval_recall": 0.8826150209128933, "eval_runtime": 5.3979, "eval_samples_per_second": 73.917, "eval_steps_per_second": 9.263, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.289058208465576, "learning_rate": 2.5e-06, "loss": 0.1434, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8851154755410074, "eval_loss": 0.2917177975177765, "eval_precision": 0.8851154755410074, "eval_recall": 0.8851154755410074, "eval_runtime": 5.1418, "eval_samples_per_second": 77.6, "eval_steps_per_second": 9.724, "step": 2318 }, { "epoch": 20.0, "grad_norm": 2.46004581451416, "learning_rate": 0.0, "loss": 0.1386, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8851154755410074, "eval_loss": 0.2913039028644562, "eval_precision": 0.8851154755410074, "eval_recall": 0.8851154755410074, "eval_runtime": 5.1424, "eval_samples_per_second": 77.591, "eval_steps_per_second": 9.723, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.22896970373685244, "train_runtime": 1968.7629, "train_samples_per_second": 36.957, "train_steps_per_second": 1.239 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }