|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.803397178649902, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5411, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7368421052631579, |
|
"eval_f1": 0.6508662716567915, |
|
"eval_loss": 0.49393221735954285, |
|
"eval_precision": 0.6761904761904762, |
|
"eval_recall": 0.6412984178941625, |
|
"eval_runtime": 1.7881, |
|
"eval_samples_per_second": 223.142, |
|
"eval_steps_per_second": 27.963, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.498361587524414, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4231, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8245614035087719, |
|
"eval_f1": 0.7995262704565029, |
|
"eval_loss": 0.3851858377456665, |
|
"eval_precision": 0.7887596899224806, |
|
"eval_recall": 0.8183760683760684, |
|
"eval_runtime": 1.7893, |
|
"eval_samples_per_second": 222.988, |
|
"eval_steps_per_second": 27.943, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.0573930740356445, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3331, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8080535642463387, |
|
"eval_loss": 0.33130019903182983, |
|
"eval_precision": 0.8232818379877204, |
|
"eval_recall": 0.796826695762866, |
|
"eval_runtime": 1.7976, |
|
"eval_samples_per_second": 221.96, |
|
"eval_steps_per_second": 27.815, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.250720500946045, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2924, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8560793854229822, |
|
"eval_loss": 0.30566585063934326, |
|
"eval_precision": 0.8609538327526132, |
|
"eval_recall": 0.8516548463356974, |
|
"eval_runtime": 1.7957, |
|
"eval_samples_per_second": 222.195, |
|
"eval_steps_per_second": 27.844, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.8374593257904053, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2705, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8422176526415692, |
|
"eval_loss": 0.3068975508213043, |
|
"eval_precision": 0.8604724566416373, |
|
"eval_recall": 0.8288325150027278, |
|
"eval_runtime": 1.7956, |
|
"eval_samples_per_second": 222.215, |
|
"eval_steps_per_second": 27.846, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.305149078369141, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2461, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8562182887453875, |
|
"eval_loss": 0.31193241477012634, |
|
"eval_precision": 0.8435805201992252, |
|
"eval_recall": 0.8763411529368976, |
|
"eval_runtime": 1.7944, |
|
"eval_samples_per_second": 222.356, |
|
"eval_steps_per_second": 27.864, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.4272942841053009, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2313, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8662440310793597, |
|
"eval_loss": 0.28799474239349365, |
|
"eval_precision": 0.8606158357771261, |
|
"eval_recall": 0.872704128023277, |
|
"eval_runtime": 1.8006, |
|
"eval_samples_per_second": 221.595, |
|
"eval_steps_per_second": 27.769, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.898254871368408, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2183, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8676331036823873, |
|
"eval_loss": 0.27734559774398804, |
|
"eval_precision": 0.8749292230261088, |
|
"eval_recall": 0.8612474995453718, |
|
"eval_runtime": 1.7976, |
|
"eval_samples_per_second": 221.957, |
|
"eval_steps_per_second": 27.814, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.726850509643555, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2093, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8587719298245614, |
|
"eval_loss": 0.28041473031044006, |
|
"eval_precision": 0.864771021021021, |
|
"eval_recall": 0.8534278959810875, |
|
"eval_runtime": 1.7999, |
|
"eval_samples_per_second": 221.684, |
|
"eval_steps_per_second": 27.78, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.7861063480377197, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1986, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8654532336864889, |
|
"eval_loss": 0.28901827335357666, |
|
"eval_precision": 0.8804269882659713, |
|
"eval_recall": 0.8537461356610292, |
|
"eval_runtime": 1.7942, |
|
"eval_samples_per_second": 222.384, |
|
"eval_steps_per_second": 27.868, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.18681001663208, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1881, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8629148629148629, |
|
"eval_loss": 0.29107582569122314, |
|
"eval_precision": 0.8657894736842104, |
|
"eval_recall": 0.860201854882706, |
|
"eval_runtime": 1.8131, |
|
"eval_samples_per_second": 220.068, |
|
"eval_steps_per_second": 27.577, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 3.137617588043213, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1802, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8568221901555235, |
|
"eval_loss": 0.28662246465682983, |
|
"eval_precision": 0.8596491228070176, |
|
"eval_recall": 0.8541553009638116, |
|
"eval_runtime": 1.8223, |
|
"eval_samples_per_second": 218.951, |
|
"eval_steps_per_second": 27.437, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.8551347851753235, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.169, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8564658408408408, |
|
"eval_loss": 0.2963576018810272, |
|
"eval_precision": 0.8697278911564625, |
|
"eval_recall": 0.8459265320967448, |
|
"eval_runtime": 1.8327, |
|
"eval_samples_per_second": 217.71, |
|
"eval_steps_per_second": 27.282, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 5.66387414932251, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1709, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8629148629148629, |
|
"eval_loss": 0.29438090324401855, |
|
"eval_precision": 0.8657894736842104, |
|
"eval_recall": 0.860201854882706, |
|
"eval_runtime": 1.8299, |
|
"eval_samples_per_second": 218.043, |
|
"eval_steps_per_second": 27.324, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.363886833190918, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1492, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8636104675452922, |
|
"eval_loss": 0.28655046224594116, |
|
"eval_precision": 0.8645363713902765, |
|
"eval_recall": 0.8627023095108202, |
|
"eval_runtime": 1.8286, |
|
"eval_samples_per_second": 218.201, |
|
"eval_steps_per_second": 27.344, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.6481318473815918, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1493, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8742647058823529, |
|
"eval_loss": 0.2950553596019745, |
|
"eval_precision": 0.8707860158154468, |
|
"eval_recall": 0.8780232769594472, |
|
"eval_runtime": 1.8286, |
|
"eval_samples_per_second": 218.205, |
|
"eval_steps_per_second": 27.344, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.7666211724281311, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1425, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8710526315789473, |
|
"eval_loss": 0.304831326007843, |
|
"eval_precision": 0.8772522522522522, |
|
"eval_recall": 0.8655210038188761, |
|
"eval_runtime": 1.7983, |
|
"eval_samples_per_second": 221.876, |
|
"eval_steps_per_second": 27.804, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.819899797439575, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1375, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8790689216221131, |
|
"eval_loss": 0.298705130815506, |
|
"eval_precision": 0.8790689216221131, |
|
"eval_recall": 0.8790689216221131, |
|
"eval_runtime": 1.8038, |
|
"eval_samples_per_second": 221.2, |
|
"eval_steps_per_second": 27.719, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 1.7430284023284912, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1326, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8778322106552358, |
|
"eval_loss": 0.30734923481941223, |
|
"eval_precision": 0.8818924438393465, |
|
"eval_recall": 0.8740680123658847, |
|
"eval_runtime": 1.8064, |
|
"eval_samples_per_second": 220.886, |
|
"eval_steps_per_second": 27.68, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.931983709335327, |
|
"learning_rate": 0.0, |
|
"loss": 0.1365, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8784574144023395, |
|
"eval_loss": 0.29978305101394653, |
|
"eval_precision": 0.8804194733619106, |
|
"eval_recall": 0.8765684669939988, |
|
"eval_runtime": 1.8111, |
|
"eval_samples_per_second": 220.313, |
|
"eval_steps_per_second": 27.608, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.2259816083751741, |
|
"train_runtime": 638.5228, |
|
"train_samples_per_second": 113.951, |
|
"train_steps_per_second": 3.821 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|