|
{ |
|
"best_metric": 0.8048780487804879, |
|
"best_model_checkpoint": "MAE-CT-CPC-Dicotomized-v4-early-stop/checkpoint-648", |
|
"epoch": 15.05625, |
|
"eval_steps": 500, |
|
"global_step": 1296, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.944444444444446e-07, |
|
"loss": 0.5724, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.633, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 0.5686, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.605, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 0.7115, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.6218, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.861111111111111e-06, |
|
"loss": 0.6258, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.5342, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.6829268292682927, |
|
"eval_loss": 0.6496201753616333, |
|
"eval_runtime": 8.1028, |
|
"eval_samples_per_second": 5.06, |
|
"eval_steps_per_second": 1.358, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.6638, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.5117, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 7.638888888888888e-06, |
|
"loss": 0.6552, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.5231, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.027777777777779e-06, |
|
"loss": 0.6073, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.6471, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.953703703703704e-06, |
|
"loss": 0.6375, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.876543209876543e-06, |
|
"loss": 0.6266, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.6829268292682927, |
|
"eval_loss": 0.5853927135467529, |
|
"eval_runtime": 8.0931, |
|
"eval_samples_per_second": 5.066, |
|
"eval_steps_per_second": 1.359, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.799382716049384e-06, |
|
"loss": 0.5042, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.4928, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.645061728395062e-06, |
|
"loss": 0.5231, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.567901234567902e-06, |
|
"loss": 0.5299, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.490740740740741e-06, |
|
"loss": 0.6126, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.413580246913581e-06, |
|
"loss": 0.4196, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.33641975308642e-06, |
|
"loss": 0.9175, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.25925925925926e-06, |
|
"loss": 0.6599, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_accuracy": 0.6829268292682927, |
|
"eval_loss": 0.5205796957015991, |
|
"eval_runtime": 8.4167, |
|
"eval_samples_per_second": 4.871, |
|
"eval_steps_per_second": 1.307, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.1820987654321e-06, |
|
"loss": 0.5163, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 9.10493827160494e-06, |
|
"loss": 0.6428, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 9.027777777777779e-06, |
|
"loss": 0.5963, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 8.950617283950618e-06, |
|
"loss": 0.5579, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 8.873456790123458e-06, |
|
"loss": 0.6436, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 8.796296296296297e-06, |
|
"loss": 0.6368, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 8.719135802469136e-06, |
|
"loss": 0.5194, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 8.641975308641975e-06, |
|
"loss": 0.877, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_accuracy": 0.6097560975609756, |
|
"eval_loss": 0.5995270013809204, |
|
"eval_runtime": 8.3891, |
|
"eval_samples_per_second": 4.887, |
|
"eval_steps_per_second": 1.311, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.564814814814816e-06, |
|
"loss": 0.5644, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 8.487654320987654e-06, |
|
"loss": 0.4776, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 8.410493827160495e-06, |
|
"loss": 0.5117, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.4388, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 8.256172839506174e-06, |
|
"loss": 0.5065, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 8.179012345679013e-06, |
|
"loss": 0.5147, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 8.101851851851854e-06, |
|
"loss": 0.6924, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 8.024691358024692e-06, |
|
"loss": 0.653, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_accuracy": 0.7560975609756098, |
|
"eval_loss": 0.4907689392566681, |
|
"eval_runtime": 7.7719, |
|
"eval_samples_per_second": 5.275, |
|
"eval_steps_per_second": 1.415, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 7.947530864197531e-06, |
|
"loss": 0.536, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 7.870370370370372e-06, |
|
"loss": 0.4254, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 7.79320987654321e-06, |
|
"loss": 0.4454, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 7.71604938271605e-06, |
|
"loss": 0.4594, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 7.638888888888888e-06, |
|
"loss": 0.4811, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 7.561728395061729e-06, |
|
"loss": 0.4024, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 7.484567901234569e-06, |
|
"loss": 0.3778, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.7604, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"eval_accuracy": 0.7804878048780488, |
|
"eval_loss": 0.49357670545578003, |
|
"eval_runtime": 8.1369, |
|
"eval_samples_per_second": 5.039, |
|
"eval_steps_per_second": 1.352, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 7.330246913580248e-06, |
|
"loss": 0.5151, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 7.253086419753087e-06, |
|
"loss": 0.3392, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 7.1759259259259266e-06, |
|
"loss": 0.5844, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 7.098765432098766e-06, |
|
"loss": 0.5688, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 7.021604938271606e-06, |
|
"loss": 0.247, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.7517, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 6.867283950617285e-06, |
|
"loss": 0.3608, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 6.790123456790124e-06, |
|
"loss": 0.4795, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_accuracy": 0.6829268292682927, |
|
"eval_loss": 0.9527755379676819, |
|
"eval_runtime": 8.3829, |
|
"eval_samples_per_second": 4.891, |
|
"eval_steps_per_second": 1.312, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 6.712962962962963e-06, |
|
"loss": 0.3914, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 6.635802469135803e-06, |
|
"loss": 0.4056, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 6.558641975308642e-06, |
|
"loss": 0.4884, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 6.481481481481482e-06, |
|
"loss": 0.3237, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 6.404320987654321e-06, |
|
"loss": 0.4742, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 6.3271604938271615e-06, |
|
"loss": 0.4839, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.265, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 6.17283950617284e-06, |
|
"loss": 0.278, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"eval_accuracy": 0.8048780487804879, |
|
"eval_loss": 0.5564919710159302, |
|
"eval_runtime": 8.4164, |
|
"eval_samples_per_second": 4.871, |
|
"eval_steps_per_second": 1.307, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6.09567901234568e-06, |
|
"loss": 0.1885, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 6.018518518518519e-06, |
|
"loss": 0.2307, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 5.941358024691358e-06, |
|
"loss": 0.4786, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 5.864197530864199e-06, |
|
"loss": 0.3708, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 5.787037037037038e-06, |
|
"loss": 0.5062, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 5.7098765432098764e-06, |
|
"loss": 0.7825, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 5.632716049382716e-06, |
|
"loss": 0.1902, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.3548, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_accuracy": 0.7560975609756098, |
|
"eval_loss": 0.5855191946029663, |
|
"eval_runtime": 7.7883, |
|
"eval_samples_per_second": 5.264, |
|
"eval_steps_per_second": 1.412, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.4783950617283955e-06, |
|
"loss": 0.2654, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 5.401234567901234e-06, |
|
"loss": 0.2532, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 5.324074074074075e-06, |
|
"loss": 0.243, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 5.246913580246914e-06, |
|
"loss": 0.4346, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 5.1697530864197534e-06, |
|
"loss": 0.1327, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 5.092592592592593e-06, |
|
"loss": 0.3484, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 5.015432098765433e-06, |
|
"loss": 0.4213, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 4.938271604938272e-06, |
|
"loss": 0.3673, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 4.861111111111111e-06, |
|
"loss": 0.4386, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_accuracy": 0.7560975609756098, |
|
"eval_loss": 0.6577650904655457, |
|
"eval_runtime": 8.1212, |
|
"eval_samples_per_second": 5.049, |
|
"eval_steps_per_second": 1.354, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 4.783950617283951e-06, |
|
"loss": 0.5757, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 4.706790123456791e-06, |
|
"loss": 0.2701, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 4.62962962962963e-06, |
|
"loss": 0.1286, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 4.55246913580247e-06, |
|
"loss": 0.2345, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 4.475308641975309e-06, |
|
"loss": 0.1286, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 4.398148148148149e-06, |
|
"loss": 0.3579, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 4.3209876543209875e-06, |
|
"loss": 0.408, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 4.243827160493827e-06, |
|
"loss": 0.3007, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"eval_accuracy": 0.7804878048780488, |
|
"eval_loss": 0.6622430682182312, |
|
"eval_runtime": 8.0725, |
|
"eval_samples_per_second": 5.079, |
|
"eval_steps_per_second": 1.363, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.4887, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.0895061728395066e-06, |
|
"loss": 0.1813, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 4.012345679012346e-06, |
|
"loss": 0.1648, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 3.935185185185186e-06, |
|
"loss": 0.4133, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 3.858024691358025e-06, |
|
"loss": 0.1691, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 3.7808641975308645e-06, |
|
"loss": 0.1626, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.2215, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 3.6265432098765434e-06, |
|
"loss": 0.313, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"eval_accuracy": 0.7560975609756098, |
|
"eval_loss": 0.8349580764770508, |
|
"eval_runtime": 8.0234, |
|
"eval_samples_per_second": 5.11, |
|
"eval_steps_per_second": 1.371, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 3.549382716049383e-06, |
|
"loss": 0.4007, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 0.2461, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 3.395061728395062e-06, |
|
"loss": 0.2541, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 3.3179012345679013e-06, |
|
"loss": 0.3941, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 3.240740740740741e-06, |
|
"loss": 0.0845, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 3.1635802469135807e-06, |
|
"loss": 0.2944, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 3.08641975308642e-06, |
|
"loss": 0.2391, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 3.0092592592592597e-06, |
|
"loss": 0.0554, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"eval_accuracy": 0.7073170731707317, |
|
"eval_loss": 1.0043153762817383, |
|
"eval_runtime": 7.5796, |
|
"eval_samples_per_second": 5.409, |
|
"eval_steps_per_second": 1.451, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 2.9320987654320994e-06, |
|
"loss": 0.2887, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 2.8549382716049382e-06, |
|
"loss": 0.4346, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.3038, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 2.700617283950617e-06, |
|
"loss": 0.1073, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 2.623456790123457e-06, |
|
"loss": 0.2334, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 2.5462962962962966e-06, |
|
"loss": 0.34, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 2.469135802469136e-06, |
|
"loss": 0.0705, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 2.3919753086419755e-06, |
|
"loss": 0.2804, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"eval_accuracy": 0.7073170731707317, |
|
"eval_loss": 1.0246809720993042, |
|
"eval_runtime": 7.4342, |
|
"eval_samples_per_second": 5.515, |
|
"eval_steps_per_second": 1.48, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2.314814814814815e-06, |
|
"loss": 0.0367, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 2.2376543209876545e-06, |
|
"loss": 0.2872, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 2.1604938271604937e-06, |
|
"loss": 0.0806, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 0.1376, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 2.006172839506173e-06, |
|
"loss": 0.466, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 1.9290123456790124e-06, |
|
"loss": 0.2337, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.3175, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 1.7746913580246916e-06, |
|
"loss": 0.1424, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"eval_accuracy": 0.7804878048780488, |
|
"eval_loss": 0.8541743755340576, |
|
"eval_runtime": 7.7108, |
|
"eval_samples_per_second": 5.317, |
|
"eval_steps_per_second": 1.427, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.697530864197531e-06, |
|
"loss": 0.1911, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 1.6203703703703705e-06, |
|
"loss": 0.0041, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 1.54320987654321e-06, |
|
"loss": 0.3306, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 1.4660493827160497e-06, |
|
"loss": 0.1108, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.1808, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 1.3117283950617284e-06, |
|
"loss": 0.1984, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"learning_rate": 1.234567901234568e-06, |
|
"loss": 0.0157, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"learning_rate": 1.1574074074074076e-06, |
|
"loss": 0.4692, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"eval_accuracy": 0.7317073170731707, |
|
"eval_loss": 1.0264424085617065, |
|
"eval_runtime": 7.57, |
|
"eval_samples_per_second": 5.416, |
|
"eval_steps_per_second": 1.453, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"step": 1296, |
|
"total_flos": 2.262256757640895e+19, |
|
"train_loss": 0.4041450611419148, |
|
"train_runtime": 2319.3656, |
|
"train_samples_per_second": 2.483, |
|
"train_steps_per_second": 0.621 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"eval_accuracy": 0.8292682926829268, |
|
"eval_loss": 0.4281896650791168, |
|
"eval_runtime": 7.8902, |
|
"eval_samples_per_second": 5.196, |
|
"eval_steps_per_second": 1.394, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"eval_accuracy": 0.8292682926829268, |
|
"eval_loss": 0.4281897246837616, |
|
"eval_runtime": 7.7423, |
|
"eval_samples_per_second": 5.296, |
|
"eval_steps_per_second": 1.421, |
|
"step": 1296 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 2.262256757640895e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|