|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.279876708984375, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5457, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7192982456140351, |
|
"eval_f1": 0.6013986013986015, |
|
"eval_loss": 0.4752861261367798, |
|
"eval_precision": 0.6464762782128051, |
|
"eval_recall": 0.5963811602109474, |
|
"eval_runtime": 5.2249, |
|
"eval_samples_per_second": 76.365, |
|
"eval_steps_per_second": 9.57, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.457512855529785, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4518, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7969924812030075, |
|
"eval_f1": 0.7684901970616256, |
|
"eval_loss": 0.4070126414299011, |
|
"eval_precision": 0.7589055735189347, |
|
"eval_recall": 0.7863702491362066, |
|
"eval_runtime": 5.1546, |
|
"eval_samples_per_second": 77.407, |
|
"eval_steps_per_second": 9.7, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.400643825531006, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3461, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.7970482088214634, |
|
"eval_loss": 0.3412257730960846, |
|
"eval_precision": 0.8231252179979072, |
|
"eval_recall": 0.7807783233315149, |
|
"eval_runtime": 5.1898, |
|
"eval_samples_per_second": 76.882, |
|
"eval_steps_per_second": 9.634, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.3028647899627686, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2958, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8228567054500919, |
|
"eval_loss": 0.3252820372581482, |
|
"eval_precision": 0.8263351692555232, |
|
"eval_recall": 0.8196490270958356, |
|
"eval_runtime": 5.1759, |
|
"eval_samples_per_second": 77.088, |
|
"eval_steps_per_second": 9.66, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.7779637575149536, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2659, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8560793854229822, |
|
"eval_loss": 0.2941116690635681, |
|
"eval_precision": 0.8609538327526132, |
|
"eval_recall": 0.8516548463356974, |
|
"eval_runtime": 5.1453, |
|
"eval_samples_per_second": 77.546, |
|
"eval_steps_per_second": 9.718, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.210702896118164, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2482, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8563451067988272, |
|
"eval_loss": 0.2965351343154907, |
|
"eval_precision": 0.8473119816985988, |
|
"eval_recall": 0.8681123840698308, |
|
"eval_runtime": 5.1607, |
|
"eval_samples_per_second": 77.315, |
|
"eval_steps_per_second": 9.689, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.5390880703926086, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2264, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8530841286673736, |
|
"eval_loss": 0.286903977394104, |
|
"eval_precision": 0.8446597760551249, |
|
"eval_recall": 0.8638388797963266, |
|
"eval_runtime": 5.4309, |
|
"eval_samples_per_second": 73.468, |
|
"eval_steps_per_second": 9.207, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.480511426925659, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2218, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8730431462390225, |
|
"eval_loss": 0.2795129120349884, |
|
"eval_precision": 0.8961111111111111, |
|
"eval_recall": 0.8565648299690853, |
|
"eval_runtime": 5.1715, |
|
"eval_samples_per_second": 77.153, |
|
"eval_steps_per_second": 9.668, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.716876029968262, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2106, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8715803025426456, |
|
"eval_loss": 0.27050527930259705, |
|
"eval_precision": 0.8673433153814287, |
|
"eval_recall": 0.8762502273140571, |
|
"eval_runtime": 5.1606, |
|
"eval_samples_per_second": 77.317, |
|
"eval_steps_per_second": 9.689, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.200309991836548, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1981, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8867007927797945, |
|
"eval_loss": 0.275096595287323, |
|
"eval_precision": 0.89198606271777, |
|
"eval_recall": 0.8818876159301692, |
|
"eval_runtime": 5.1671, |
|
"eval_samples_per_second": 77.22, |
|
"eval_steps_per_second": 9.677, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.14013409614563, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1802, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8862394957983193, |
|
"eval_loss": 0.27452006936073303, |
|
"eval_precision": 0.8826476009275225, |
|
"eval_recall": 0.8901163847972358, |
|
"eval_runtime": 5.1559, |
|
"eval_samples_per_second": 77.387, |
|
"eval_steps_per_second": 9.698, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 6.242837905883789, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1828, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8854915648632926, |
|
"eval_loss": 0.279909610748291, |
|
"eval_precision": 0.8956662848415425, |
|
"eval_recall": 0.8768867066739408, |
|
"eval_runtime": 5.1353, |
|
"eval_samples_per_second": 77.698, |
|
"eval_steps_per_second": 9.737, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.8258763551712036, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1707, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9097744360902256, |
|
"eval_f1": 0.8894736842105263, |
|
"eval_loss": 0.27386558055877686, |
|
"eval_precision": 0.8959740990990992, |
|
"eval_recall": 0.8836606655755592, |
|
"eval_runtime": 5.1775, |
|
"eval_samples_per_second": 77.065, |
|
"eval_steps_per_second": 9.657, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 3.6909942626953125, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1606, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.888964101175568, |
|
"eval_loss": 0.28680744767189026, |
|
"eval_precision": 0.8862007168458781, |
|
"eval_recall": 0.8918894344426259, |
|
"eval_runtime": 5.1734, |
|
"eval_samples_per_second": 77.126, |
|
"eval_steps_per_second": 9.665, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.49116143584251404, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1499, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8817957385392532, |
|
"eval_loss": 0.2929786145687103, |
|
"eval_precision": 0.8827677592299257, |
|
"eval_recall": 0.8808419712675032, |
|
"eval_runtime": 5.1562, |
|
"eval_samples_per_second": 77.383, |
|
"eval_steps_per_second": 9.697, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.3892961740493774, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1555, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8760282890453928, |
|
"eval_loss": 0.3041006028652191, |
|
"eval_precision": 0.8682260305697083, |
|
"eval_recall": 0.8855246408437898, |
|
"eval_runtime": 5.1466, |
|
"eval_samples_per_second": 77.527, |
|
"eval_steps_per_second": 9.715, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.9728598594665527, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1396, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8823853973772722, |
|
"eval_loss": 0.28764259815216064, |
|
"eval_precision": 0.8814464081066409, |
|
"eval_recall": 0.8833424258956174, |
|
"eval_runtime": 5.1398, |
|
"eval_samples_per_second": 77.63, |
|
"eval_steps_per_second": 9.728, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.8131844997406006, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1477, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8845345436822225, |
|
"eval_loss": 0.28996890783309937, |
|
"eval_precision": 0.8865278628291489, |
|
"eval_recall": 0.8826150209128933, |
|
"eval_runtime": 5.3979, |
|
"eval_samples_per_second": 73.917, |
|
"eval_steps_per_second": 9.263, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.289058208465576, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1434, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8851154755410074, |
|
"eval_loss": 0.2917177975177765, |
|
"eval_precision": 0.8851154755410074, |
|
"eval_recall": 0.8851154755410074, |
|
"eval_runtime": 5.1418, |
|
"eval_samples_per_second": 77.6, |
|
"eval_steps_per_second": 9.724, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.46004581451416, |
|
"learning_rate": 0.0, |
|
"loss": 0.1386, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8851154755410074, |
|
"eval_loss": 0.2913039028644562, |
|
"eval_precision": 0.8851154755410074, |
|
"eval_recall": 0.8851154755410074, |
|
"eval_runtime": 5.1424, |
|
"eval_samples_per_second": 77.591, |
|
"eval_steps_per_second": 9.723, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.22896970373685244, |
|
"train_runtime": 1968.7629, |
|
"train_samples_per_second": 36.957, |
|
"train_steps_per_second": 1.239 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|