|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.7648299932479858, |
|
"learning_rate": 4.75e-05, |
|
"loss": 1.1483, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8448836627470603, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.6899715065956116, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 0.8894, |
|
"eval_samples_per_second": 210.248, |
|
"eval_steps_per_second": 3.373, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.372132420539856, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6875, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8463847885914436, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.5737118721008301, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 0.926, |
|
"eval_samples_per_second": 201.946, |
|
"eval_steps_per_second": 3.24, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.2157526016235352, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.5874, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8633975481611208, |
|
"eval_f1": 0.10071942446043167, |
|
"eval_loss": 0.4661254584789276, |
|
"eval_precision": 0.2692307692307692, |
|
"eval_recall": 0.061946902654867256, |
|
"eval_runtime": 0.9773, |
|
"eval_samples_per_second": 191.347, |
|
"eval_steps_per_second": 3.07, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.093184232711792, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4729, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.898173630222667, |
|
"eval_f1": 0.37722419928825623, |
|
"eval_loss": 0.3599094748497009, |
|
"eval_precision": 0.47533632286995514, |
|
"eval_recall": 0.31268436578171094, |
|
"eval_runtime": 0.9352, |
|
"eval_samples_per_second": 199.961, |
|
"eval_steps_per_second": 3.208, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.716677665710449, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3692, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9246935201401051, |
|
"eval_f1": 0.5917496443812232, |
|
"eval_loss": 0.29404327273368835, |
|
"eval_precision": 0.5714285714285714, |
|
"eval_recall": 0.6135693215339233, |
|
"eval_runtime": 0.899, |
|
"eval_samples_per_second": 208.018, |
|
"eval_steps_per_second": 3.337, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.1444728374481201, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3058, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9334500875656743, |
|
"eval_f1": 0.6621621621621622, |
|
"eval_loss": 0.2527407705783844, |
|
"eval_precision": 0.6109725685785536, |
|
"eval_recall": 0.7227138643067846, |
|
"eval_runtime": 0.8914, |
|
"eval_samples_per_second": 209.792, |
|
"eval_steps_per_second": 3.366, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.998134434223175, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2636, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.937453089817363, |
|
"eval_f1": 0.6954177897574124, |
|
"eval_loss": 0.22462092339992523, |
|
"eval_precision": 0.6401985111662531, |
|
"eval_recall": 0.7610619469026548, |
|
"eval_runtime": 0.9047, |
|
"eval_samples_per_second": 206.701, |
|
"eval_steps_per_second": 3.316, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.480947732925415, |
|
"learning_rate": 3e-05, |
|
"loss": 0.24, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9417062797097824, |
|
"eval_f1": 0.7241379310344828, |
|
"eval_loss": 0.20909550786018372, |
|
"eval_precision": 0.6578313253012048, |
|
"eval_recall": 0.8053097345132744, |
|
"eval_runtime": 0.9189, |
|
"eval_samples_per_second": 203.503, |
|
"eval_steps_per_second": 3.265, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.5677202939987183, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2228, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.940205153865399, |
|
"eval_f1": 0.716883116883117, |
|
"eval_loss": 0.1985715925693512, |
|
"eval_precision": 0.6403712296983759, |
|
"eval_recall": 0.8141592920353983, |
|
"eval_runtime": 0.9225, |
|
"eval_samples_per_second": 202.716, |
|
"eval_steps_per_second": 3.252, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.242694854736328, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2105, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9417062797097824, |
|
"eval_f1": 0.733245729303548, |
|
"eval_loss": 0.1821284145116806, |
|
"eval_precision": 0.6611374407582938, |
|
"eval_recall": 0.8230088495575221, |
|
"eval_runtime": 0.925, |
|
"eval_samples_per_second": 202.159, |
|
"eval_steps_per_second": 3.243, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.5337995290756226, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2007, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9432074055541656, |
|
"eval_f1": 0.7394736842105263, |
|
"eval_loss": 0.17939399182796478, |
|
"eval_precision": 0.667458432304038, |
|
"eval_recall": 0.8289085545722714, |
|
"eval_runtime": 0.9211, |
|
"eval_samples_per_second": 203.028, |
|
"eval_steps_per_second": 3.257, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 2.7884016036987305, |
|
"learning_rate": 2e-05, |
|
"loss": 0.195, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9429572179134351, |
|
"eval_f1": 0.7392996108949417, |
|
"eval_loss": 0.18079817295074463, |
|
"eval_precision": 0.6597222222222222, |
|
"eval_recall": 0.8407079646017699, |
|
"eval_runtime": 0.9116, |
|
"eval_samples_per_second": 205.139, |
|
"eval_steps_per_second": 3.291, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.4956753253936768, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.19, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9459594696022017, |
|
"eval_f1": 0.7463479415670652, |
|
"eval_loss": 0.16896921396255493, |
|
"eval_precision": 0.678743961352657, |
|
"eval_recall": 0.8289085545722714, |
|
"eval_runtime": 0.9045, |
|
"eval_samples_per_second": 206.741, |
|
"eval_steps_per_second": 3.317, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 1.4634425640106201, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1835, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9477107830873155, |
|
"eval_f1": 0.751336898395722, |
|
"eval_loss": 0.16314250230789185, |
|
"eval_precision": 0.687041564792176, |
|
"eval_recall": 0.8289085545722714, |
|
"eval_runtime": 0.9091, |
|
"eval_samples_per_second": 205.694, |
|
"eval_steps_per_second": 3.3, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 2.0593953132629395, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1821, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9472104078058544, |
|
"eval_f1": 0.753968253968254, |
|
"eval_loss": 0.16711552441120148, |
|
"eval_precision": 0.6834532374100719, |
|
"eval_recall": 0.8407079646017699, |
|
"eval_runtime": 0.9129, |
|
"eval_samples_per_second": 204.846, |
|
"eval_steps_per_second": 3.286, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.1103074550628662, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1774, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9472104078058544, |
|
"eval_f1": 0.7647831800262812, |
|
"eval_loss": 0.16675202548503876, |
|
"eval_precision": 0.6895734597156398, |
|
"eval_recall": 0.8584070796460177, |
|
"eval_runtime": 0.913, |
|
"eval_samples_per_second": 204.819, |
|
"eval_steps_per_second": 3.286, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 2.5177829265594482, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1764, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9477107830873155, |
|
"eval_f1": 0.7602649006622517, |
|
"eval_loss": 0.16347847878932953, |
|
"eval_precision": 0.6899038461538461, |
|
"eval_recall": 0.8466076696165191, |
|
"eval_runtime": 0.9243, |
|
"eval_samples_per_second": 202.326, |
|
"eval_steps_per_second": 3.246, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.8699701428413391, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1729, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9472104078058544, |
|
"eval_f1": 0.7611548556430445, |
|
"eval_loss": 0.16536261141300201, |
|
"eval_precision": 0.6855791962174941, |
|
"eval_recall": 0.855457227138643, |
|
"eval_runtime": 0.9101, |
|
"eval_samples_per_second": 205.482, |
|
"eval_steps_per_second": 3.297, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 2.786510705947876, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1726, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9477107830873155, |
|
"eval_f1": 0.7621550591327202, |
|
"eval_loss": 0.16280074417591095, |
|
"eval_precision": 0.6872037914691943, |
|
"eval_recall": 0.855457227138643, |
|
"eval_runtime": 0.9163, |
|
"eval_samples_per_second": 204.074, |
|
"eval_steps_per_second": 3.274, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.0649765729904175, |
|
"learning_rate": 0.0, |
|
"loss": 0.1684, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9472104078058544, |
|
"eval_f1": 0.7595269382391592, |
|
"eval_loss": 0.16256052255630493, |
|
"eval_precision": 0.6848341232227488, |
|
"eval_recall": 0.8525073746312685, |
|
"eval_runtime": 0.9134, |
|
"eval_samples_per_second": 204.735, |
|
"eval_steps_per_second": 3.285, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2120, |
|
"total_flos": 904262544144960.0, |
|
"train_loss": 0.3163445589677343, |
|
"train_runtime": 244.3626, |
|
"train_samples_per_second": 138.074, |
|
"train_steps_per_second": 8.676 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 904262544144960.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|