|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.4620718955993652, |
|
"learning_rate": 4.75e-05, |
|
"loss": 1.2537, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8365739601279842, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.7370312809944153, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.0987, |
|
"eval_samples_per_second": 170.195, |
|
"eval_steps_per_second": 2.73, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.4634525775909424, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.7093, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8373123307900566, |
|
"eval_f1": 0.0056022408963585435, |
|
"eval_loss": 0.6298087239265442, |
|
"eval_precision": 0.16666666666666666, |
|
"eval_recall": 0.002849002849002849, |
|
"eval_runtime": 0.9814, |
|
"eval_samples_per_second": 190.541, |
|
"eval_steps_per_second": 3.057, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.2972607612609863, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.6232, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8417425547624908, |
|
"eval_f1": 0.032171581769437, |
|
"eval_loss": 0.5443252325057983, |
|
"eval_precision": 0.2727272727272727, |
|
"eval_recall": 0.017094017094017096, |
|
"eval_runtime": 1.0287, |
|
"eval_samples_per_second": 181.785, |
|
"eval_steps_per_second": 2.916, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.2424334287643433, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5363, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8624169333005168, |
|
"eval_f1": 0.14977973568281938, |
|
"eval_loss": 0.45594143867492676, |
|
"eval_precision": 0.3300970873786408, |
|
"eval_recall": 0.09686609686609686, |
|
"eval_runtime": 0.9669, |
|
"eval_samples_per_second": 193.397, |
|
"eval_steps_per_second": 3.103, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.3577224016189575, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4591, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8929362539995077, |
|
"eval_f1": 0.3604240282685513, |
|
"eval_loss": 0.3863191306591034, |
|
"eval_precision": 0.4744186046511628, |
|
"eval_recall": 0.2905982905982906, |
|
"eval_runtime": 0.9477, |
|
"eval_samples_per_second": 197.315, |
|
"eval_steps_per_second": 3.165, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.2191588878631592, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.387, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9190253507260645, |
|
"eval_f1": 0.5714285714285715, |
|
"eval_loss": 0.32724133133888245, |
|
"eval_precision": 0.5789473684210527, |
|
"eval_recall": 0.5641025641025641, |
|
"eval_runtime": 0.9315, |
|
"eval_samples_per_second": 200.744, |
|
"eval_steps_per_second": 3.22, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.1846706867218018, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3252, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9291164164410534, |
|
"eval_f1": 0.6436170212765958, |
|
"eval_loss": 0.2810536026954651, |
|
"eval_precision": 0.6034912718204489, |
|
"eval_recall": 0.6894586894586895, |
|
"eval_runtime": 0.9504, |
|
"eval_samples_per_second": 196.764, |
|
"eval_steps_per_second": 3.157, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.0833159685134888, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2874, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9313315284272705, |
|
"eval_f1": 0.655217965653897, |
|
"eval_loss": 0.24546079337596893, |
|
"eval_precision": 0.6108374384236454, |
|
"eval_recall": 0.7065527065527065, |
|
"eval_runtime": 0.9612, |
|
"eval_samples_per_second": 194.541, |
|
"eval_steps_per_second": 3.121, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.9407267570495605, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2588, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9333005168594635, |
|
"eval_f1": 0.6761290322580644, |
|
"eval_loss": 0.22847984731197357, |
|
"eval_precision": 0.6179245283018868, |
|
"eval_recall": 0.7464387464387464, |
|
"eval_runtime": 0.9716, |
|
"eval_samples_per_second": 192.466, |
|
"eval_steps_per_second": 3.088, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.7366420030593872, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2393, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9362539995077529, |
|
"eval_f1": 0.6975546975546976, |
|
"eval_loss": 0.21532489359378815, |
|
"eval_precision": 0.636150234741784, |
|
"eval_recall": 0.7720797720797721, |
|
"eval_runtime": 0.9655, |
|
"eval_samples_per_second": 193.678, |
|
"eval_steps_per_second": 3.107, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.1416951417922974, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.224, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9387152350479941, |
|
"eval_f1": 0.7030456852791879, |
|
"eval_loss": 0.2062499076128006, |
|
"eval_precision": 0.6338672768878718, |
|
"eval_recall": 0.7891737891737892, |
|
"eval_runtime": 0.9619, |
|
"eval_samples_per_second": 194.401, |
|
"eval_steps_per_second": 3.119, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.7414947748184204, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2137, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9387152350479941, |
|
"eval_f1": 0.7135549872122762, |
|
"eval_loss": 0.20024912059307098, |
|
"eval_precision": 0.6473317865429234, |
|
"eval_recall": 0.7948717948717948, |
|
"eval_runtime": 0.9544, |
|
"eval_samples_per_second": 195.928, |
|
"eval_steps_per_second": 3.143, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.186489224433899, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2052, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9424070883583558, |
|
"eval_f1": 0.7218628719275549, |
|
"eval_loss": 0.18892288208007812, |
|
"eval_precision": 0.6611374407582938, |
|
"eval_recall": 0.7948717948717948, |
|
"eval_runtime": 0.9502, |
|
"eval_samples_per_second": 196.793, |
|
"eval_steps_per_second": 3.157, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 1.4163442850112915, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2039, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9431454590204282, |
|
"eval_f1": 0.7312661498708009, |
|
"eval_loss": 0.18623687326908112, |
|
"eval_precision": 0.6690307328605201, |
|
"eval_recall": 0.8062678062678063, |
|
"eval_runtime": 0.9553, |
|
"eval_samples_per_second": 195.753, |
|
"eval_steps_per_second": 3.14, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.5289697647094727, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1975, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9431454590204282, |
|
"eval_f1": 0.7319587628865979, |
|
"eval_loss": 0.1867983341217041, |
|
"eval_precision": 0.668235294117647, |
|
"eval_recall": 0.8091168091168092, |
|
"eval_runtime": 0.9597, |
|
"eval_samples_per_second": 194.853, |
|
"eval_steps_per_second": 3.126, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 2.371168375015259, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1936, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.94265321191238, |
|
"eval_f1": 0.7321428571428572, |
|
"eval_loss": 0.18374690413475037, |
|
"eval_precision": 0.6628175519630485, |
|
"eval_recall": 0.8176638176638177, |
|
"eval_runtime": 0.9485, |
|
"eval_samples_per_second": 197.152, |
|
"eval_steps_per_second": 3.163, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.7330523133277893, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1908, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.94265321191238, |
|
"eval_f1": 0.7305236270753512, |
|
"eval_loss": 0.18250302970409393, |
|
"eval_precision": 0.6620370370370371, |
|
"eval_recall": 0.8148148148148148, |
|
"eval_runtime": 0.9449, |
|
"eval_samples_per_second": 197.907, |
|
"eval_steps_per_second": 3.175, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.4619590044021606, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1885, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9431454590204282, |
|
"eval_f1": 0.7270408163265305, |
|
"eval_loss": 0.1805543154478073, |
|
"eval_precision": 0.6581986143187067, |
|
"eval_recall": 0.811965811965812, |
|
"eval_runtime": 0.95, |
|
"eval_samples_per_second": 196.84, |
|
"eval_steps_per_second": 3.158, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.7237643003463745, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1877, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9431454590204282, |
|
"eval_f1": 0.7247119078104994, |
|
"eval_loss": 0.17826727032661438, |
|
"eval_precision": 0.6581395348837209, |
|
"eval_recall": 0.8062678062678063, |
|
"eval_runtime": 0.9528, |
|
"eval_samples_per_second": 196.268, |
|
"eval_steps_per_second": 3.149, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.3640408515930176, |
|
"learning_rate": 0.0, |
|
"loss": 0.1858, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9433915825744523, |
|
"eval_f1": 0.7279693486590039, |
|
"eval_loss": 0.1783868670463562, |
|
"eval_precision": 0.6597222222222222, |
|
"eval_recall": 0.811965811965812, |
|
"eval_runtime": 0.9505, |
|
"eval_samples_per_second": 196.741, |
|
"eval_steps_per_second": 3.156, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2120, |
|
"total_flos": 907028676246000.0, |
|
"train_loss": 0.35348991987840184, |
|
"train_runtime": 247.3863, |
|
"train_samples_per_second": 136.467, |
|
"train_steps_per_second": 8.57 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 907028676246000.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|