|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.928847551345825, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5514, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7218045112781954, |
|
"eval_f1": 0.6545993371027491, |
|
"eval_loss": 0.5084339380264282, |
|
"eval_precision": 0.6600553802562947, |
|
"eval_recall": 0.6506637570467357, |
|
"eval_runtime": 1.7877, |
|
"eval_samples_per_second": 223.186, |
|
"eval_steps_per_second": 27.968, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.589020013809204, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4753, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8170426065162907, |
|
"eval_f1": 0.7662453352594198, |
|
"eval_loss": 0.40073099732398987, |
|
"eval_precision": 0.7870255775577557, |
|
"eval_recall": 0.7530460083651573, |
|
"eval_runtime": 1.8277, |
|
"eval_samples_per_second": 218.302, |
|
"eval_steps_per_second": 27.356, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 7.217240810394287, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3834, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.7805280528052805, |
|
"eval_loss": 0.35415172576904297, |
|
"eval_precision": 0.8448765432098766, |
|
"eval_recall": 0.7540007274049827, |
|
"eval_runtime": 1.8275, |
|
"eval_samples_per_second": 218.333, |
|
"eval_steps_per_second": 27.36, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.0342143774032593, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3188, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8333016825553572, |
|
"eval_loss": 0.32140952348709106, |
|
"eval_precision": 0.8341507249908615, |
|
"eval_recall": 0.8324695399163484, |
|
"eval_runtime": 1.8265, |
|
"eval_samples_per_second": 218.445, |
|
"eval_steps_per_second": 27.374, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.17711341381073, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2981, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.855319904024935, |
|
"eval_loss": 0.2984267771244049, |
|
"eval_precision": 0.862378106322743, |
|
"eval_recall": 0.8491543917075832, |
|
"eval_runtime": 1.8288, |
|
"eval_samples_per_second": 218.18, |
|
"eval_steps_per_second": 27.341, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.153679370880127, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2835, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8368354828562441, |
|
"eval_loss": 0.28104665875434875, |
|
"eval_precision": 0.8520237470480189, |
|
"eval_recall": 0.8252864157119476, |
|
"eval_runtime": 1.8275, |
|
"eval_samples_per_second": 218.335, |
|
"eval_steps_per_second": 27.36, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.0657362937927246, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2517, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8622036668943447, |
|
"eval_loss": 0.28660351037979126, |
|
"eval_precision": 0.8671602787456446, |
|
"eval_recall": 0.8577014002545917, |
|
"eval_runtime": 1.7903, |
|
"eval_samples_per_second": 222.862, |
|
"eval_steps_per_second": 27.928, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.21960711479187, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2374, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8485289465359063, |
|
"eval_loss": 0.2996794879436493, |
|
"eval_precision": 0.8671008040401356, |
|
"eval_recall": 0.8348790689216221, |
|
"eval_runtime": 1.7958, |
|
"eval_samples_per_second": 222.185, |
|
"eval_steps_per_second": 27.843, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.313467025756836, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2293, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8518472677764712, |
|
"eval_loss": 0.29089975357055664, |
|
"eval_precision": 0.8599810186649794, |
|
"eval_recall": 0.844880887434079, |
|
"eval_runtime": 1.7949, |
|
"eval_samples_per_second": 222.299, |
|
"eval_steps_per_second": 27.857, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.5997090339660645, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2091, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8589543987905864, |
|
"eval_loss": 0.29276683926582336, |
|
"eval_precision": 0.8564068100358423, |
|
"eval_recall": 0.8616566648481543, |
|
"eval_runtime": 1.7944, |
|
"eval_samples_per_second": 222.357, |
|
"eval_steps_per_second": 27.864, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.3290369510650635, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.198, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8569892473118279, |
|
"eval_loss": 0.28468698263168335, |
|
"eval_precision": 0.8522004241781549, |
|
"eval_recall": 0.8623840698308783, |
|
"eval_runtime": 1.8002, |
|
"eval_samples_per_second": 221.645, |
|
"eval_steps_per_second": 27.775, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 5.568458557128906, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1906, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.843111041207927, |
|
"eval_loss": 0.31200090050697327, |
|
"eval_precision": 0.8585673051692468, |
|
"eval_recall": 0.831332969630842, |
|
"eval_runtime": 1.7936, |
|
"eval_samples_per_second": 222.458, |
|
"eval_steps_per_second": 27.877, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.5475130081176758, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1818, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.850729517396184, |
|
"eval_loss": 0.2906310558319092, |
|
"eval_precision": 0.8535087719298247, |
|
"eval_recall": 0.8481087470449173, |
|
"eval_runtime": 1.795, |
|
"eval_samples_per_second": 222.279, |
|
"eval_steps_per_second": 27.854, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 5.56436014175415, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1756, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8514869535493182, |
|
"eval_loss": 0.28101998567581177, |
|
"eval_precision": 0.8523821128305106, |
|
"eval_recall": 0.8506092016730314, |
|
"eval_runtime": 1.7948, |
|
"eval_samples_per_second": 222.311, |
|
"eval_steps_per_second": 27.859, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.7677656412124634, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.174, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8595070422535211, |
|
"eval_loss": 0.2828814685344696, |
|
"eval_precision": 0.8633733523114054, |
|
"eval_recall": 0.8559283506092017, |
|
"eval_runtime": 1.7987, |
|
"eval_samples_per_second": 221.825, |
|
"eval_steps_per_second": 27.798, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 2.1199889183044434, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1705, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.855319904024935, |
|
"eval_loss": 0.292202889919281, |
|
"eval_precision": 0.862378106322743, |
|
"eval_recall": 0.8491543917075832, |
|
"eval_runtime": 1.7972, |
|
"eval_samples_per_second": 222.011, |
|
"eval_steps_per_second": 27.821, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 2.521127223968506, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1509, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8568221901555235, |
|
"eval_loss": 0.2991296947002411, |
|
"eval_precision": 0.8596491228070176, |
|
"eval_recall": 0.8541553009638116, |
|
"eval_runtime": 1.7965, |
|
"eval_samples_per_second": 222.104, |
|
"eval_steps_per_second": 27.833, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.4539577960968018, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1549, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.855319904024935, |
|
"eval_loss": 0.3000391125679016, |
|
"eval_precision": 0.862378106322743, |
|
"eval_recall": 0.8491543917075832, |
|
"eval_runtime": 1.7991, |
|
"eval_samples_per_second": 221.772, |
|
"eval_steps_per_second": 27.791, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 1.3508776426315308, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1469, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8609292598654301, |
|
"eval_loss": 0.2942558228969574, |
|
"eval_precision": 0.8609292598654301, |
|
"eval_recall": 0.8609292598654301, |
|
"eval_runtime": 1.8067, |
|
"eval_samples_per_second": 220.841, |
|
"eval_steps_per_second": 27.674, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.1654489040374756, |
|
"learning_rate": 0.0, |
|
"loss": 0.1493, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.855319904024935, |
|
"eval_loss": 0.3026413023471832, |
|
"eval_precision": 0.862378106322743, |
|
"eval_recall": 0.8491543917075832, |
|
"eval_runtime": 1.7968, |
|
"eval_samples_per_second": 222.067, |
|
"eval_steps_per_second": 27.828, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8460375995160000.0, |
|
"train_loss": 0.24652244458433056, |
|
"train_runtime": 623.7969, |
|
"train_samples_per_second": 116.865, |
|
"train_steps_per_second": 3.912 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8460375995160000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|