|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.569611072540283, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5438, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7218045112781954, |
|
"eval_f1": 0.6545993371027491, |
|
"eval_loss": 0.49882158637046814, |
|
"eval_precision": 0.6600553802562947, |
|
"eval_recall": 0.6506637570467357, |
|
"eval_runtime": 5.2428, |
|
"eval_samples_per_second": 76.104, |
|
"eval_steps_per_second": 9.537, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.2452878952026367, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4428, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8161454307628278, |
|
"eval_loss": 0.3788329064846039, |
|
"eval_precision": 0.8107299270072992, |
|
"eval_recall": 0.8225586470267321, |
|
"eval_runtime": 5.1661, |
|
"eval_samples_per_second": 77.234, |
|
"eval_steps_per_second": 9.679, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.797173500061035, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3441, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8179269882659713, |
|
"eval_loss": 0.3289283514022827, |
|
"eval_precision": 0.8510239760239761, |
|
"eval_recall": 0.7981905801054737, |
|
"eval_runtime": 5.1072, |
|
"eval_samples_per_second": 78.125, |
|
"eval_steps_per_second": 9.79, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.9981327056884766, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2986, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8533986527862829, |
|
"eval_loss": 0.28838610649108887, |
|
"eval_precision": 0.8572003218020917, |
|
"eval_recall": 0.8498817966903074, |
|
"eval_runtime": 5.1209, |
|
"eval_samples_per_second": 77.916, |
|
"eval_steps_per_second": 9.764, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.763756513595581, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2667, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.850729517396184, |
|
"eval_loss": 0.26981133222579956, |
|
"eval_precision": 0.8535087719298247, |
|
"eval_recall": 0.8481087470449173, |
|
"eval_runtime": 5.1232, |
|
"eval_samples_per_second": 77.881, |
|
"eval_steps_per_second": 9.759, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.7370102405548096, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2524, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8609292598654301, |
|
"eval_loss": 0.27233511209487915, |
|
"eval_precision": 0.8609292598654301, |
|
"eval_recall": 0.8609292598654301, |
|
"eval_runtime": 5.1586, |
|
"eval_samples_per_second": 77.347, |
|
"eval_steps_per_second": 9.693, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.2413272857666016, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2343, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8265664241097301, |
|
"eval_loss": 0.3179858326911926, |
|
"eval_precision": 0.8532894736842105, |
|
"eval_recall": 0.8092380432805966, |
|
"eval_runtime": 5.1498, |
|
"eval_samples_per_second": 77.479, |
|
"eval_steps_per_second": 9.709, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.469871997833252, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2212, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.852937255424767, |
|
"eval_loss": 0.29489362239837646, |
|
"eval_precision": 0.8674217731421121, |
|
"eval_recall": 0.8416530278232406, |
|
"eval_runtime": 5.155, |
|
"eval_samples_per_second": 77.4, |
|
"eval_steps_per_second": 9.699, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.085997581481934, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2142, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8564658408408408, |
|
"eval_loss": 0.2828481197357178, |
|
"eval_precision": 0.8697278911564625, |
|
"eval_recall": 0.8459265320967448, |
|
"eval_runtime": 5.1873, |
|
"eval_samples_per_second": 76.919, |
|
"eval_steps_per_second": 9.639, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.2397539615631104, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1958, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8457993935430168, |
|
"eval_loss": 0.28871509432792664, |
|
"eval_precision": 0.8398540145985401, |
|
"eval_recall": 0.8527914166212038, |
|
"eval_runtime": 5.1373, |
|
"eval_samples_per_second": 77.667, |
|
"eval_steps_per_second": 9.733, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.4002835750579834, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1855, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8602993213495533, |
|
"eval_loss": 0.2867955267429352, |
|
"eval_precision": 0.8547653958944281, |
|
"eval_recall": 0.8666575741043827, |
|
"eval_runtime": 5.1317, |
|
"eval_samples_per_second": 77.752, |
|
"eval_steps_per_second": 9.743, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.633034348487854, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1742, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8448388501742161, |
|
"eval_loss": 0.29811105132102966, |
|
"eval_precision": 0.8551721930610677, |
|
"eval_recall": 0.8363338788870704, |
|
"eval_runtime": 5.1494, |
|
"eval_samples_per_second": 77.484, |
|
"eval_steps_per_second": 9.71, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 2.338294506072998, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1601, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8556004584112431, |
|
"eval_loss": 0.29304954409599304, |
|
"eval_precision": 0.8538865546218487, |
|
"eval_recall": 0.85738316057465, |
|
"eval_runtime": 5.0982, |
|
"eval_samples_per_second": 78.263, |
|
"eval_steps_per_second": 9.807, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 1.3382197618484497, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1602, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8589607635206786, |
|
"eval_loss": 0.29793980717658997, |
|
"eval_precision": 0.8503875968992248, |
|
"eval_recall": 0.8698854337152209, |
|
"eval_runtime": 5.1508, |
|
"eval_samples_per_second": 77.464, |
|
"eval_steps_per_second": 9.707, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.5800030827522278, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1497, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8662440310793597, |
|
"eval_loss": 0.29690659046173096, |
|
"eval_precision": 0.8606158357771261, |
|
"eval_recall": 0.872704128023277, |
|
"eval_runtime": 5.1461, |
|
"eval_samples_per_second": 77.534, |
|
"eval_steps_per_second": 9.716, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.755856037139893, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1447, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8616171059774413, |
|
"eval_loss": 0.29627636075019836, |
|
"eval_precision": 0.859873949579832, |
|
"eval_recall": 0.8634297144935443, |
|
"eval_runtime": 5.1639, |
|
"eval_samples_per_second": 77.267, |
|
"eval_steps_per_second": 9.683, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 2.1538662910461426, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1394, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8589543987905864, |
|
"eval_loss": 0.30176377296447754, |
|
"eval_precision": 0.8564068100358423, |
|
"eval_recall": 0.8616566648481543, |
|
"eval_runtime": 5.17, |
|
"eval_samples_per_second": 77.176, |
|
"eval_steps_per_second": 9.671, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 2.54630708694458, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1333, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.858259325044405, |
|
"eval_loss": 0.30650317668914795, |
|
"eval_precision": 0.8573798178418481, |
|
"eval_recall": 0.8591562102200401, |
|
"eval_runtime": 5.128, |
|
"eval_samples_per_second": 77.807, |
|
"eval_steps_per_second": 9.75, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.7696042656898499, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1406, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.858259325044405, |
|
"eval_loss": 0.30623340606689453, |
|
"eval_precision": 0.8573798178418481, |
|
"eval_recall": 0.8591562102200401, |
|
"eval_runtime": 5.1122, |
|
"eval_samples_per_second": 78.049, |
|
"eval_steps_per_second": 9.781, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.025254487991333, |
|
"learning_rate": 0.0, |
|
"loss": 0.1243, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.858259325044405, |
|
"eval_loss": 0.30716511607170105, |
|
"eval_precision": 0.8573798178418481, |
|
"eval_recall": 0.8591562102200401, |
|
"eval_runtime": 5.0928, |
|
"eval_samples_per_second": 78.347, |
|
"eval_steps_per_second": 9.818, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8460375995160000.0, |
|
"train_loss": 0.22629446436147221, |
|
"train_runtime": 1956.0503, |
|
"train_samples_per_second": 37.269, |
|
"train_steps_per_second": 1.247 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8460375995160000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|