|
{ |
|
"best_metric": 0.8985507246376812, |
|
"best_model_checkpoint": "vit-cxr4/checkpoint-1500", |
|
"epoch": 6.0, |
|
"eval_steps": 100, |
|
"global_step": 1914, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9843260188087776e-05, |
|
"loss": 0.6092, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9686520376175547e-05, |
|
"loss": 0.4743, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9529780564263326e-05, |
|
"loss": 0.4443, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9373040752351097e-05, |
|
"loss": 0.4, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.9216300940438872e-05, |
|
"loss": 0.3793, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.9059561128526644e-05, |
|
"loss": 0.368, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.8902821316614422e-05, |
|
"loss": 0.3486, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.8746081504702197e-05, |
|
"loss": 0.3741, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.858934169278997e-05, |
|
"loss": 0.3101, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.8432601880877743e-05, |
|
"loss": 0.3151, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.8552390906715442, |
|
"eval_f1": 0.8619142572283149, |
|
"eval_loss": 0.3317149877548218, |
|
"eval_precision": 0.8151815181518152, |
|
"eval_recall": 0.9143310417768377, |
|
"eval_runtime": 111.0975, |
|
"eval_samples_per_second": 34.447, |
|
"eval_steps_per_second": 0.54, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.8275862068965518e-05, |
|
"loss": 0.3398, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.8119122257053293e-05, |
|
"loss": 0.3507, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.7962382445141068e-05, |
|
"loss": 0.3321, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.780564263322884e-05, |
|
"loss": 0.3672, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.7648902821316615e-05, |
|
"loss": 0.374, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.749216300940439e-05, |
|
"loss": 0.3301, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7335423197492164e-05, |
|
"loss": 0.2942, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.7178683385579936e-05, |
|
"loss": 0.2841, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.702194357366771e-05, |
|
"loss": 0.2873, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.686520376175549e-05, |
|
"loss": 0.319, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.8620329239613274, |
|
"eval_f1": 0.8591248665955177, |
|
"eval_loss": 0.3048080801963806, |
|
"eval_precision": 0.8669897684437264, |
|
"eval_recall": 0.8514013749338974, |
|
"eval_runtime": 111.0562, |
|
"eval_samples_per_second": 34.46, |
|
"eval_steps_per_second": 0.54, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.670846394984326e-05, |
|
"loss": 0.3004, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.6551724137931036e-05, |
|
"loss": 0.2666, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.6394984326018807e-05, |
|
"loss": 0.3005, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.6238244514106586e-05, |
|
"loss": 0.2986, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.6081504702194357e-05, |
|
"loss": 0.2848, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.5924764890282132e-05, |
|
"loss": 0.2733, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.5768025078369907e-05, |
|
"loss": 0.3, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.561128526645768e-05, |
|
"loss": 0.2949, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.5454545454545457e-05, |
|
"loss": 0.2754, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.529780564263323e-05, |
|
"loss": 0.2926, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.8630781290828325, |
|
"eval_f1": 0.8621052631578947, |
|
"eval_loss": 0.28672918677330017, |
|
"eval_precision": 0.858040859088528, |
|
"eval_recall": 0.8662083553675304, |
|
"eval_runtime": 110.1753, |
|
"eval_samples_per_second": 34.736, |
|
"eval_steps_per_second": 0.545, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.5141065830721003e-05, |
|
"loss": 0.2624, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.498432601880878e-05, |
|
"loss": 0.289, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.4827586206896553e-05, |
|
"loss": 0.2474, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.4670846394984328e-05, |
|
"loss": 0.2608, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.45141065830721e-05, |
|
"loss": 0.2675, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.4357366771159875e-05, |
|
"loss": 0.2019, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.420062695924765e-05, |
|
"loss": 0.249, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.4043887147335425e-05, |
|
"loss": 0.2448, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.38871473354232e-05, |
|
"loss": 0.2159, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.373040752351097e-05, |
|
"loss": 0.1884, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.885550039195192, |
|
"eval_f1": 0.890115403913698, |
|
"eval_loss": 0.26350638270378113, |
|
"eval_precision": 0.8467780429594272, |
|
"eval_recall": 0.938127974616605, |
|
"eval_runtime": 109.3869, |
|
"eval_samples_per_second": 34.986, |
|
"eval_steps_per_second": 0.549, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.3573667711598746e-05, |
|
"loss": 0.234, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.341692789968652e-05, |
|
"loss": 0.2151, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.3260188087774296e-05, |
|
"loss": 0.2391, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.3103448275862067e-05, |
|
"loss": 0.2644, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.2946708463949842e-05, |
|
"loss": 0.2211, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.278996865203762e-05, |
|
"loss": 0.2327, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.2633228840125392e-05, |
|
"loss": 0.2315, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.2476489028213167e-05, |
|
"loss": 0.2386, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.231974921630094e-05, |
|
"loss": 0.2363, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.2163009404388717e-05, |
|
"loss": 0.234, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_accuracy": 0.8813692187091716, |
|
"eval_f1": 0.8896451142440447, |
|
"eval_loss": 0.2639271020889282, |
|
"eval_precision": 0.8232118758434548, |
|
"eval_recall": 0.967741935483871, |
|
"eval_runtime": 110.312, |
|
"eval_samples_per_second": 34.692, |
|
"eval_steps_per_second": 0.544, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.2006269592476492e-05, |
|
"loss": 0.1994, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.1849529780564264e-05, |
|
"loss": 0.2286, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.169278996865204e-05, |
|
"loss": 0.219, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.153605015673981e-05, |
|
"loss": 0.2267, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.137931034482759e-05, |
|
"loss": 0.2032, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.122257053291536e-05, |
|
"loss": 0.2334, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.1065830721003135e-05, |
|
"loss": 0.2013, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.090909090909091e-05, |
|
"loss": 0.2145, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.0752351097178685e-05, |
|
"loss": 0.2113, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.059561128526646e-05, |
|
"loss": 0.2349, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.8873791481578259, |
|
"eval_f1": 0.8911341247789846, |
|
"eval_loss": 0.24775628745555878, |
|
"eval_precision": 0.8529980657640233, |
|
"eval_recall": 0.9328397673188789, |
|
"eval_runtime": 108.727, |
|
"eval_samples_per_second": 35.198, |
|
"eval_steps_per_second": 0.552, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.043887147335423e-05, |
|
"loss": 0.2177, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.0282131661442006e-05, |
|
"loss": 0.2257, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.012539184952978e-05, |
|
"loss": 0.2283, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9968652037617556e-05, |
|
"loss": 0.1925, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.981191222570533e-05, |
|
"loss": 0.1603, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.9655172413793102e-05, |
|
"loss": 0.1712, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.9498432601880877e-05, |
|
"loss": 0.1476, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.9341692789968652e-05, |
|
"loss": 0.1266, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.9184952978056427e-05, |
|
"loss": 0.142, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.9028213166144202e-05, |
|
"loss": 0.1476, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.8894695584008362, |
|
"eval_f1": 0.8926123381568927, |
|
"eval_loss": 0.25599205493927, |
|
"eval_precision": 0.8583984375, |
|
"eval_recall": 0.9296668429402433, |
|
"eval_runtime": 112.4052, |
|
"eval_samples_per_second": 34.046, |
|
"eval_steps_per_second": 0.534, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.8871473354231974e-05, |
|
"loss": 0.172, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.8714733542319752e-05, |
|
"loss": 0.1384, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.8557993730407524e-05, |
|
"loss": 0.1401, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.84012539184953e-05, |
|
"loss": 0.1554, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.824451410658307e-05, |
|
"loss": 0.1591, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.808777429467085e-05, |
|
"loss": 0.163, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.7931034482758623e-05, |
|
"loss": 0.1565, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.7774294670846395e-05, |
|
"loss": 0.1528, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.761755485893417e-05, |
|
"loss": 0.148, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.746081504702194e-05, |
|
"loss": 0.1289, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_accuracy": 0.8868565455970734, |
|
"eval_f1": 0.8862023653088041, |
|
"eval_loss": 0.26980680227279663, |
|
"eval_precision": 0.8808777429467085, |
|
"eval_recall": 0.8915917503966155, |
|
"eval_runtime": 109.5776, |
|
"eval_samples_per_second": 34.925, |
|
"eval_steps_per_second": 0.548, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.730407523510972e-05, |
|
"loss": 0.137, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.714733542319749e-05, |
|
"loss": 0.1528, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.6990595611285266e-05, |
|
"loss": 0.1457, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.683385579937304e-05, |
|
"loss": 0.1398, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.6677115987460816e-05, |
|
"loss": 0.1447, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.652037617554859e-05, |
|
"loss": 0.1407, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.6363636363636363e-05, |
|
"loss": 0.1421, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.6206896551724137e-05, |
|
"loss": 0.1766, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.6065830721003135e-05, |
|
"loss": 0.1507, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.590909090909091e-05, |
|
"loss": 0.1579, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.8821531225503005, |
|
"eval_f1": 0.8796370429677075, |
|
"eval_loss": 0.2614140808582306, |
|
"eval_precision": 0.8879310344827587, |
|
"eval_recall": 0.8714965626652564, |
|
"eval_runtime": 109.0275, |
|
"eval_samples_per_second": 35.101, |
|
"eval_steps_per_second": 0.55, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.5752351097178685e-05, |
|
"loss": 0.1416, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.5595611285266457e-05, |
|
"loss": 0.1502, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.543887147335423e-05, |
|
"loss": 0.1447, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.5282131661442007e-05, |
|
"loss": 0.1688, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.512539184952978e-05, |
|
"loss": 0.1594, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.4968652037617555e-05, |
|
"loss": 0.1427, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.481191222570533e-05, |
|
"loss": 0.0816, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.4655172413793105e-05, |
|
"loss": 0.0904, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.4498432601880878e-05, |
|
"loss": 0.079, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.4341692789968653e-05, |
|
"loss": 0.0745, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_accuracy": 0.8889469558400837, |
|
"eval_f1": 0.8879514895860795, |
|
"eval_loss": 0.2783428430557251, |
|
"eval_precision": 0.8853838065194533, |
|
"eval_recall": 0.8905341089370703, |
|
"eval_runtime": 108.1962, |
|
"eval_samples_per_second": 35.371, |
|
"eval_steps_per_second": 0.555, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.4184952978056426e-05, |
|
"loss": 0.082, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.4028213166144201e-05, |
|
"loss": 0.0818, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.3871473354231976e-05, |
|
"loss": 0.0689, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.371473354231975e-05, |
|
"loss": 0.0733, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.3557993730407524e-05, |
|
"loss": 0.0725, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.3401253918495299e-05, |
|
"loss": 0.0693, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.3244514106583072e-05, |
|
"loss": 0.0844, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.3087774294670845e-05, |
|
"loss": 0.08, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.293103448275862e-05, |
|
"loss": 0.0726, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.2774294670846395e-05, |
|
"loss": 0.0697, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_accuracy": 0.8899921609615887, |
|
"eval_f1": 0.8885948663667637, |
|
"eval_loss": 0.28440213203430176, |
|
"eval_precision": 0.8893008474576272, |
|
"eval_recall": 0.8878900052882073, |
|
"eval_runtime": 113.241, |
|
"eval_samples_per_second": 33.795, |
|
"eval_steps_per_second": 0.53, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.261755485893417e-05, |
|
"loss": 0.0637, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.2460815047021943e-05, |
|
"loss": 0.0866, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.2304075235109718e-05, |
|
"loss": 0.0798, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.2147335423197492e-05, |
|
"loss": 0.0826, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.1990595611285267e-05, |
|
"loss": 0.0695, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.1833855799373042e-05, |
|
"loss": 0.0508, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.1677115987460816e-05, |
|
"loss": 0.0728, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.152037617554859e-05, |
|
"loss": 0.0627, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.1363636363636365e-05, |
|
"loss": 0.0598, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.1206896551724138e-05, |
|
"loss": 0.0602, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"eval_accuracy": 0.8868565455970734, |
|
"eval_f1": 0.8863815271582262, |
|
"eval_loss": 0.3213111460208893, |
|
"eval_precision": 0.8796875, |
|
"eval_recall": 0.8931782125859333, |
|
"eval_runtime": 109.9441, |
|
"eval_samples_per_second": 34.809, |
|
"eval_steps_per_second": 0.546, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.1050156739811911e-05, |
|
"loss": 0.0743, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.0893416927899688e-05, |
|
"loss": 0.091, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.0736677115987461e-05, |
|
"loss": 0.0711, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.0595611285266457e-05, |
|
"loss": 0.0779, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.0438871473354232e-05, |
|
"loss": 0.0638, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.0282131661442005e-05, |
|
"loss": 0.0589, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.0125391849529782e-05, |
|
"loss": 0.076, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 9.968652037617555e-06, |
|
"loss": 0.0667, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 9.81191222570533e-06, |
|
"loss": 0.0327, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 9.655172413793103e-06, |
|
"loss": 0.0246, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_accuracy": 0.8912986673634701, |
|
"eval_f1": 0.892116182572614, |
|
"eval_loss": 0.3393436670303345, |
|
"eval_precision": 0.8753180661577609, |
|
"eval_recall": 0.9095716552088842, |
|
"eval_runtime": 109.0634, |
|
"eval_samples_per_second": 35.09, |
|
"eval_steps_per_second": 0.55, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 9.498432601880878e-06, |
|
"loss": 0.0254, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 9.341692789968651e-06, |
|
"loss": 0.0187, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 9.184952978056428e-06, |
|
"loss": 0.0288, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 9.028213166144201e-06, |
|
"loss": 0.0204, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 8.871473354231975e-06, |
|
"loss": 0.0372, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 8.71473354231975e-06, |
|
"loss": 0.0286, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 8.557993730407523e-06, |
|
"loss": 0.0217, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 8.401253918495298e-06, |
|
"loss": 0.0223, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 8.244514106583073e-06, |
|
"loss": 0.0184, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 8.087774294670848e-06, |
|
"loss": 0.0301, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_accuracy": 0.8936503788868565, |
|
"eval_f1": 0.896358543417367, |
|
"eval_loss": 0.3592565953731537, |
|
"eval_precision": 0.8644400785854617, |
|
"eval_recall": 0.9307244843997885, |
|
"eval_runtime": 109.1725, |
|
"eval_samples_per_second": 35.055, |
|
"eval_steps_per_second": 0.55, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 7.93103448275862e-06, |
|
"loss": 0.028, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 7.774294670846396e-06, |
|
"loss": 0.0221, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 7.617554858934169e-06, |
|
"loss": 0.0256, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 7.460815047021944e-06, |
|
"loss": 0.0266, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 7.304075235109718e-06, |
|
"loss": 0.0173, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 7.147335423197492e-06, |
|
"loss": 0.0228, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 6.990595611285267e-06, |
|
"loss": 0.0189, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 6.833855799373041e-06, |
|
"loss": 0.0163, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 6.677115987460815e-06, |
|
"loss": 0.0195, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 6.520376175548589e-06, |
|
"loss": 0.0348, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_accuracy": 0.8957407891298668, |
|
"eval_f1": 0.8985507246376812, |
|
"eval_loss": 0.38041359186172485, |
|
"eval_precision": 0.8653281096963761, |
|
"eval_recall": 0.9344262295081968, |
|
"eval_runtime": 109.9449, |
|
"eval_samples_per_second": 34.808, |
|
"eval_steps_per_second": 0.546, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 6.363636363636364e-06, |
|
"loss": 0.0193, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 6.206896551724138e-06, |
|
"loss": 0.0175, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 6.050156739811912e-06, |
|
"loss": 0.0244, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 5.8934169278996865e-06, |
|
"loss": 0.0395, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 5.736677115987461e-06, |
|
"loss": 0.0198, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 5.579937304075235e-06, |
|
"loss": 0.0178, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 5.42319749216301e-06, |
|
"loss": 0.0149, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 5.266457680250784e-06, |
|
"loss": 0.0261, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 5.109717868338558e-06, |
|
"loss": 0.0178, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.952978056426333e-06, |
|
"loss": 0.011, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_accuracy": 0.8946955840083617, |
|
"eval_f1": 0.8978453738910013, |
|
"eval_loss": 0.38965746760368347, |
|
"eval_precision": 0.8622200584225901, |
|
"eval_recall": 0.9365415124272871, |
|
"eval_runtime": 109.9915, |
|
"eval_samples_per_second": 34.794, |
|
"eval_steps_per_second": 0.545, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 4.796238244514107e-06, |
|
"loss": 0.0109, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 4.639498432601881e-06, |
|
"loss": 0.0092, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 4.482758620689656e-06, |
|
"loss": 0.0092, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 4.32601880877743e-06, |
|
"loss": 0.0084, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 4.169278996865204e-06, |
|
"loss": 0.0076, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 4.012539184952979e-06, |
|
"loss": 0.0056, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 3.855799373040752e-06, |
|
"loss": 0.0057, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 3.6990595611285267e-06, |
|
"loss": 0.0055, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 3.542319749216301e-06, |
|
"loss": 0.0045, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 3.3855799373040753e-06, |
|
"loss": 0.0077, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_accuracy": 0.894956885288738, |
|
"eval_f1": 0.8962312854930304, |
|
"eval_loss": 0.408812940120697, |
|
"eval_precision": 0.875441250630358, |
|
"eval_recall": 0.9180327868852459, |
|
"eval_runtime": 109.202, |
|
"eval_samples_per_second": 35.045, |
|
"eval_steps_per_second": 0.549, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 3.2288401253918494e-06, |
|
"loss": 0.0091, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 3.072100313479624e-06, |
|
"loss": 0.0032, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 2.915360501567398e-06, |
|
"loss": 0.0044, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 2.7586206896551725e-06, |
|
"loss": 0.0052, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 2.601880877742947e-06, |
|
"loss": 0.0049, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 2.445141065830721e-06, |
|
"loss": 0.006, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 2.288401253918495e-06, |
|
"loss": 0.0035, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 2.1316614420062697e-06, |
|
"loss": 0.0041, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 1.9749216300940438e-06, |
|
"loss": 0.0076, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 0.0064, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_accuracy": 0.896002090410243, |
|
"eval_f1": 0.8970512157268494, |
|
"eval_loss": 0.4281201958656311, |
|
"eval_precision": 0.8779746835443037, |
|
"eval_recall": 0.9169751454257007, |
|
"eval_runtime": 109.7966, |
|
"eval_samples_per_second": 34.855, |
|
"eval_steps_per_second": 0.546, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 1.6614420062695926e-06, |
|
"loss": 0.004, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 1.5047021943573667e-06, |
|
"loss": 0.0041, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 1.3479623824451412e-06, |
|
"loss": 0.0038, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 1.1912225705329155e-06, |
|
"loss": 0.0044, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 1.0344827586206896e-06, |
|
"loss": 0.0024, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 8.77742946708464e-07, |
|
"loss": 0.0045, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 7.210031347962383e-07, |
|
"loss": 0.0047, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 5.642633228840126e-07, |
|
"loss": 0.0038, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 4.0752351097178683e-07, |
|
"loss": 0.0038, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 2.507836990595612e-07, |
|
"loss": 0.0031, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_accuracy": 0.894956885288738, |
|
"eval_f1": 0.8964984552008239, |
|
"eval_loss": 0.42887693643569946, |
|
"eval_precision": 0.8735574510787757, |
|
"eval_recall": 0.920676890534109, |
|
"eval_runtime": 113.2911, |
|
"eval_samples_per_second": 33.78, |
|
"eval_steps_per_second": 0.53, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 9.404388714733542e-08, |
|
"loss": 0.0047, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 1914, |
|
"total_flos": 1.4238223187805905e+19, |
|
"train_loss": 0.13623220103334482, |
|
"train_runtime": 9420.2145, |
|
"train_samples_per_second": 19.505, |
|
"train_steps_per_second": 0.203 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8923719958202717, |
|
"eval_f1": 0.8937048503611971, |
|
"eval_loss": 0.37740620970726013, |
|
"eval_precision": 0.858701041150223, |
|
"eval_recall": 0.9316837009144702, |
|
"eval_runtime": 109.2986, |
|
"eval_samples_per_second": 35.023, |
|
"eval_steps_per_second": 0.549, |
|
"step": 1914 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1914, |
|
"num_train_epochs": 6, |
|
"save_steps": 100, |
|
"total_flos": 1.4238223187805905e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|