|
{ |
|
"best_metric": 0.8251666013328106, |
|
"best_model_checkpoint": "videomae-large-cctv-brawl_extended_v1/checkpoint-12565", |
|
"epoch": 4.199681655391962, |
|
"eval_steps": 500, |
|
"global_step": 12565, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.386634844868735e-09, |
|
"loss": 0.7489, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.77326968973747e-09, |
|
"loss": 0.724, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.159904534606205e-09, |
|
"loss": 0.7649, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.54653937947494e-09, |
|
"loss": 0.6728, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.1933174224343675e-08, |
|
"loss": 0.7368, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.431980906921241e-08, |
|
"loss": 0.7369, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.6706443914081144e-08, |
|
"loss": 0.7463, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.909307875894988e-08, |
|
"loss": 0.6903, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.1479713603818616e-08, |
|
"loss": 0.725, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.386634844868735e-08, |
|
"loss": 0.7142, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.6252983293556084e-08, |
|
"loss": 0.7131, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.863961813842482e-08, |
|
"loss": 0.7178, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.102625298329355e-08, |
|
"loss": 0.6786, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.341288782816229e-08, |
|
"loss": 0.7279, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.579952267303102e-08, |
|
"loss": 0.6817, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.818615751789976e-08, |
|
"loss": 0.6895, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.05727923627685e-08, |
|
"loss": 0.6714, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.295942720763723e-08, |
|
"loss": 0.7969, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.5346062052505965e-08, |
|
"loss": 0.6794, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.77326968973747e-08, |
|
"loss": 0.7054, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.0119331742243434e-08, |
|
"loss": 0.7599, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.250596658711217e-08, |
|
"loss": 0.7148, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.48926014319809e-08, |
|
"loss": 0.7105, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.727923627684964e-08, |
|
"loss": 0.7208, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.966587112171838e-08, |
|
"loss": 0.6222, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.20525059665871e-08, |
|
"loss": 0.6742, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.443914081145585e-08, |
|
"loss": 0.661, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.682577565632457e-08, |
|
"loss": 0.722, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.921241050119332e-08, |
|
"loss": 0.7342, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.159904534606204e-08, |
|
"loss": 0.6948, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.398568019093078e-08, |
|
"loss": 0.7301, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.637231503579952e-08, |
|
"loss": 0.6596, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.875894988066825e-08, |
|
"loss": 0.6857, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.1145584725537e-08, |
|
"loss": 0.7129, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.353221957040572e-08, |
|
"loss": 0.6889, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.591885441527446e-08, |
|
"loss": 0.6673, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.830548926014319e-08, |
|
"loss": 0.6823, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.069212410501193e-08, |
|
"loss": 0.6655, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.307875894988066e-08, |
|
"loss": 0.6166, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.54653937947494e-08, |
|
"loss": 0.6371, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.785202863961813e-08, |
|
"loss": 0.704, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.0023866348448687e-07, |
|
"loss": 0.7303, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.0262529832935561e-07, |
|
"loss": 0.709, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.0501193317422434e-07, |
|
"loss": 0.6405, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.0739856801909308e-07, |
|
"loss": 0.6122, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.097852028639618e-07, |
|
"loss": 0.6458, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.1217183770883055e-07, |
|
"loss": 0.6574, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.1455847255369927e-07, |
|
"loss": 0.6348, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.1694510739856802e-07, |
|
"loss": 0.6998, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.1933174224343676e-07, |
|
"loss": 0.6245, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.2171837708830548e-07, |
|
"loss": 0.6499, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.241050119331742e-07, |
|
"loss": 0.5856, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.2649164677804294e-07, |
|
"loss": 0.7143, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.288782816229117e-07, |
|
"loss": 0.6285, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.3126491646778042e-07, |
|
"loss": 0.6485, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.3365155131264915e-07, |
|
"loss": 0.6322, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.3603818615751788e-07, |
|
"loss": 0.6228, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.3842482100238663e-07, |
|
"loss": 0.6469, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.4081145584725536e-07, |
|
"loss": 0.6374, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.4319809069212409e-07, |
|
"loss": 0.6418, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.4558472553699284e-07, |
|
"loss": 0.6212, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.4797136038186157e-07, |
|
"loss": 0.66, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.503579952267303e-07, |
|
"loss": 0.5956, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.5274463007159905e-07, |
|
"loss": 0.6426, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.5513126491646775e-07, |
|
"loss": 0.6602, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.575178997613365e-07, |
|
"loss": 0.633, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.5990453460620523e-07, |
|
"loss": 0.6016, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.62291169451074e-07, |
|
"loss": 0.6235, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.646778042959427e-07, |
|
"loss": 0.579, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.6706443914081144e-07, |
|
"loss": 0.5734, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.6945107398568017e-07, |
|
"loss": 0.5904, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.7183770883054892e-07, |
|
"loss": 0.5944, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.7422434367541765e-07, |
|
"loss": 0.6764, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.7661097852028638e-07, |
|
"loss": 0.5815, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.789976133651551e-07, |
|
"loss": 0.5883, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8138424821002386e-07, |
|
"loss": 0.6033, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.837708830548926e-07, |
|
"loss": 0.5302, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8615751789976132e-07, |
|
"loss": 0.5989, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8854415274463004e-07, |
|
"loss": 0.5534, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.909307875894988e-07, |
|
"loss": 0.6403, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9331742243436753e-07, |
|
"loss": 0.4963, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9570405727923625e-07, |
|
"loss": 0.653, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.98090692124105e-07, |
|
"loss": 0.606, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.0047732696897374e-07, |
|
"loss": 0.6209, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.028639618138425e-07, |
|
"loss": 0.5385, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.0525059665871122e-07, |
|
"loss": 0.557, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.0763723150357995e-07, |
|
"loss": 0.6703, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.1002386634844867e-07, |
|
"loss": 0.5217, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.1241050119331743e-07, |
|
"loss": 0.5697, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.1479713603818616e-07, |
|
"loss": 0.5365, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.1718377088305488e-07, |
|
"loss": 0.56, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.195704057279236e-07, |
|
"loss": 0.4775, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.2195704057279237e-07, |
|
"loss": 0.5328, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.243436754176611e-07, |
|
"loss": 0.6135, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.2673031026252982e-07, |
|
"loss": 0.5732, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.2911694510739855e-07, |
|
"loss": 0.5636, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.315035799522673e-07, |
|
"loss": 0.6081, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.3389021479713603e-07, |
|
"loss": 0.619, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.3627684964200478e-07, |
|
"loss": 0.5916, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.386634844868735e-07, |
|
"loss": 0.6113, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.4105011933174227e-07, |
|
"loss": 0.6695, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.4343675417661097e-07, |
|
"loss": 0.559, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.458233890214797e-07, |
|
"loss": 0.5641, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.482100238663484e-07, |
|
"loss": 0.5845, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.505966587112172e-07, |
|
"loss": 0.6465, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.529832935560859e-07, |
|
"loss": 0.5213, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.5536992840095463e-07, |
|
"loss": 0.5663, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.577565632458234e-07, |
|
"loss": 0.6348, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.6014319809069214e-07, |
|
"loss": 0.5389, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.6252983293556084e-07, |
|
"loss": 0.6299, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.649164677804296e-07, |
|
"loss": 0.5185, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.673031026252983e-07, |
|
"loss": 0.4967, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.6968973747016705e-07, |
|
"loss": 0.5323, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7207637231503575e-07, |
|
"loss": 0.4797, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.744630071599045e-07, |
|
"loss": 0.591, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7684964200477326e-07, |
|
"loss": 0.488, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.79236276849642e-07, |
|
"loss": 0.4519, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.816229116945107e-07, |
|
"loss": 0.5839, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.8400954653937947e-07, |
|
"loss": 0.5682, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.8639618138424817e-07, |
|
"loss": 0.4765, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.8878281622911693e-07, |
|
"loss": 0.6187, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.911694510739857e-07, |
|
"loss": 0.4894, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.935560859188544e-07, |
|
"loss": 0.5424, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9594272076372314e-07, |
|
"loss": 0.5351, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.983293556085919e-07, |
|
"loss": 0.5059, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.999204103289706e-07, |
|
"loss": 0.5178, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.996551114255394e-07, |
|
"loss": 0.3589, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9938981252210824e-07, |
|
"loss": 0.5448, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9912451361867705e-07, |
|
"loss": 0.4891, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.988592147152458e-07, |
|
"loss": 0.5095, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9859391581181463e-07, |
|
"loss": 0.5336, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9832861690838345e-07, |
|
"loss": 0.5037, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.980633180049522e-07, |
|
"loss": 0.4463, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.97798019101521e-07, |
|
"loss": 0.4545, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9753272019808984e-07, |
|
"loss": 0.5409, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9726742129465865e-07, |
|
"loss": 0.408, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9700212239122747e-07, |
|
"loss": 0.5175, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9673682348779623e-07, |
|
"loss": 0.4775, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9647152458436504e-07, |
|
"loss": 0.6129, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9620622568093386e-07, |
|
"loss": 0.366, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.959409267775026e-07, |
|
"loss": 0.4697, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9567562787407143e-07, |
|
"loss": 0.4706, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9541032897064025e-07, |
|
"loss": 0.4555, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9514503006720906e-07, |
|
"loss": 0.5475, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.948797311637778e-07, |
|
"loss": 0.4358, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9461443226034664e-07, |
|
"loss": 0.5115, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9434913335691545e-07, |
|
"loss": 0.6776, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.940838344534842e-07, |
|
"loss": 0.4034, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.938185355500531e-07, |
|
"loss": 0.5232, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9355323664662185e-07, |
|
"loss": 0.5778, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9328793774319066e-07, |
|
"loss": 0.5217, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.930226388397595e-07, |
|
"loss": 0.5378, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9275733993632824e-07, |
|
"loss": 0.5131, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9249204103289705e-07, |
|
"loss": 0.4615, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9222674212946587e-07, |
|
"loss": 0.5084, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9196144322603463e-07, |
|
"loss": 0.5639, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9169614432260344e-07, |
|
"loss": 0.6176, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9143084541917226e-07, |
|
"loss": 0.4819, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9116554651574107e-07, |
|
"loss": 0.6385, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9090024761230983e-07, |
|
"loss": 0.5515, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9063494870887865e-07, |
|
"loss": 0.5406, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9036964980544746e-07, |
|
"loss": 0.5813, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.901043509020162e-07, |
|
"loss": 0.6192, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.898390519985851e-07, |
|
"loss": 0.5759, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8957375309515386e-07, |
|
"loss": 0.4963, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8930845419172267e-07, |
|
"loss": 0.5021, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.890431552882915e-07, |
|
"loss": 0.5574, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8877785638486025e-07, |
|
"loss": 0.4434, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8851255748142906e-07, |
|
"loss": 0.5213, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.882472585779979e-07, |
|
"loss": 0.461, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.879819596745667e-07, |
|
"loss": 0.4587, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8771666077113545e-07, |
|
"loss": 0.3879, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8745136186770427e-07, |
|
"loss": 0.48, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.871860629642731e-07, |
|
"loss": 0.6378, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8692076406084184e-07, |
|
"loss": 0.5548, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8665546515741066e-07, |
|
"loss": 0.4741, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8639016625397947e-07, |
|
"loss": 0.4441, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8612486735054823e-07, |
|
"loss": 0.4345, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.858595684471171e-07, |
|
"loss": 0.6156, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8559426954368586e-07, |
|
"loss": 0.4104, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.853289706402547e-07, |
|
"loss": 0.5361, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.850636717368235e-07, |
|
"loss": 0.4971, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8479837283339226e-07, |
|
"loss": 0.4066, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8453307392996107e-07, |
|
"loss": 0.4786, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.842677750265299e-07, |
|
"loss": 0.4475, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.840024761230987e-07, |
|
"loss": 0.596, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8373717721966746e-07, |
|
"loss": 0.4797, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.834718783162363e-07, |
|
"loss": 0.6381, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.832065794128051e-07, |
|
"loss": 0.5512, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8294128050937385e-07, |
|
"loss": 0.526, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8267598160594267e-07, |
|
"loss": 0.4208, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.824106827025115e-07, |
|
"loss": 0.5019, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.821453837990803e-07, |
|
"loss": 0.5601, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.818800848956491e-07, |
|
"loss": 0.4532, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.816147859922179e-07, |
|
"loss": 0.6079, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.813494870887867e-07, |
|
"loss": 0.4444, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.810841881853555e-07, |
|
"loss": 0.5132, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8081888928192426e-07, |
|
"loss": 0.5627, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.805535903784931e-07, |
|
"loss": 0.4318, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.802882914750619e-07, |
|
"loss": 0.5026, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.800229925716307e-07, |
|
"loss": 0.5187, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.7975769366819947e-07, |
|
"loss": 0.396, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.794923947647683e-07, |
|
"loss": 0.4171, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.792270958613371e-07, |
|
"loss": 0.4792, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.7896179695790586e-07, |
|
"loss": 0.4526, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.7869649805447473e-07, |
|
"loss": 0.4952, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.784311991510435e-07, |
|
"loss": 0.5462, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.781659002476123e-07, |
|
"loss": 0.6351, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.779006013441811e-07, |
|
"loss": 0.5329, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.776353024407499e-07, |
|
"loss": 0.3761, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.773700035373187e-07, |
|
"loss": 0.4625, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.771047046338875e-07, |
|
"loss": 0.5105, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.768394057304563e-07, |
|
"loss": 0.4063, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.765741068270251e-07, |
|
"loss": 0.4644, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.763088079235939e-07, |
|
"loss": 0.4825, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.760435090201627e-07, |
|
"loss": 0.5125, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.757782101167315e-07, |
|
"loss": 0.5761, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.755129112133003e-07, |
|
"loss": 0.4859, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.752476123098691e-07, |
|
"loss": 0.4388, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7498231340643787e-07, |
|
"loss": 0.5507, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7471701450300674e-07, |
|
"loss": 0.4343, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.744517155995755e-07, |
|
"loss": 0.4135, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.741864166961443e-07, |
|
"loss": 0.4108, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7392111779271313e-07, |
|
"loss": 0.4274, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.736558188892819e-07, |
|
"loss": 0.4261, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.733905199858507e-07, |
|
"loss": 0.4734, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.731252210824195e-07, |
|
"loss": 0.5553, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7285992217898834e-07, |
|
"loss": 0.4643, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.725946232755571e-07, |
|
"loss": 0.458, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.723293243721259e-07, |
|
"loss": 0.5908, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7206402546869473e-07, |
|
"loss": 0.533, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.717987265652635e-07, |
|
"loss": 0.6152, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.715334276618323e-07, |
|
"loss": 0.3958, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.712681287584011e-07, |
|
"loss": 0.4243, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.710028298549699e-07, |
|
"loss": 0.4743, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7073753095153875e-07, |
|
"loss": 0.603, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.704722320481075e-07, |
|
"loss": 0.4256, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.702069331446763e-07, |
|
"loss": 0.5295, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.6994163424124514e-07, |
|
"loss": 0.4739, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.696763353378139e-07, |
|
"loss": 0.3976, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.694110364343827e-07, |
|
"loss": 0.5386, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.6914573753095153e-07, |
|
"loss": 0.5893, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.6888043862752035e-07, |
|
"loss": 0.4234, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.686151397240891e-07, |
|
"loss": 0.411, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.683498408206579e-07, |
|
"loss": 0.5547, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.6808454191722674e-07, |
|
"loss": 0.4212, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.678192430137955e-07, |
|
"loss": 0.3612, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.675539441103643e-07, |
|
"loss": 0.7204, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.6728864520693313e-07, |
|
"loss": 0.5413, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.6702334630350194e-07, |
|
"loss": 0.4005, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.6675804740007076e-07, |
|
"loss": 0.4193, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.6985495883967072, |
|
"eval_loss": 0.48386532068252563, |
|
"eval_runtime": 707.7642, |
|
"eval_samples_per_second": 3.604, |
|
"eval_steps_per_second": 0.901, |
|
"step": 2514 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.664927484966395e-07, |
|
"loss": 0.4189, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.6622744959320833e-07, |
|
"loss": 0.5329, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.6596215068977715e-07, |
|
"loss": 0.528, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.656968517863459e-07, |
|
"loss": 0.4498, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.654315528829147e-07, |
|
"loss": 0.4362, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.6516625397948354e-07, |
|
"loss": 0.4513, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6490095507605236e-07, |
|
"loss": 0.45, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6463565617262117e-07, |
|
"loss": 0.5109, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6437035726918993e-07, |
|
"loss": 0.454, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6410505836575875e-07, |
|
"loss": 0.5103, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6383975946232756e-07, |
|
"loss": 0.521, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.635744605588964e-07, |
|
"loss": 0.5211, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6330916165546514e-07, |
|
"loss": 0.485, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6304386275203395e-07, |
|
"loss": 0.3995, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6277856384860277e-07, |
|
"loss": 0.6925, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6251326494517153e-07, |
|
"loss": 0.3811, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6224796604174034e-07, |
|
"loss": 0.4942, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6198266713830916e-07, |
|
"loss": 0.4295, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.617173682348779e-07, |
|
"loss": 0.6002, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.614520693314468e-07, |
|
"loss": 0.3757, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.6118677042801555e-07, |
|
"loss": 0.502, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.6092147152458436e-07, |
|
"loss": 0.2979, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.606561726211532e-07, |
|
"loss": 0.3475, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.6039087371772194e-07, |
|
"loss": 0.5197, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.6012557481429076e-07, |
|
"loss": 0.379, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.5986027591085957e-07, |
|
"loss": 0.4161, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.595949770074284e-07, |
|
"loss": 0.561, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.5932967810399715e-07, |
|
"loss": 0.4509, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.5906437920056596e-07, |
|
"loss": 0.3427, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.587990802971348e-07, |
|
"loss": 0.315, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.5853378139370354e-07, |
|
"loss": 0.5377, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.5826848249027235e-07, |
|
"loss": 0.4747, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.5800318358684117e-07, |
|
"loss": 0.604, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.5773788468341e-07, |
|
"loss": 0.6197, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.574725857799788e-07, |
|
"loss": 0.6246, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.5720728687654756e-07, |
|
"loss": 0.385, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.569419879731164e-07, |
|
"loss": 0.3885, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.566766890696852e-07, |
|
"loss": 0.3586, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.5641139016625395e-07, |
|
"loss": 0.4253, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.5614609126282276e-07, |
|
"loss": 0.3906, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.558807923593916e-07, |
|
"loss": 0.5617, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.556154934559604e-07, |
|
"loss": 0.5688, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.5535019455252916e-07, |
|
"loss": 0.5803, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.5508489564909797e-07, |
|
"loss": 0.4535, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.548195967456668e-07, |
|
"loss": 0.4301, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.5455429784223555e-07, |
|
"loss": 0.4844, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.542889989388044e-07, |
|
"loss": 0.4359, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.540237000353732e-07, |
|
"loss": 0.4562, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.53758401131942e-07, |
|
"loss": 0.3864, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.534931022285108e-07, |
|
"loss": 0.5046, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.5322780332507957e-07, |
|
"loss": 0.2887, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.529625044216484e-07, |
|
"loss": 0.4083, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.526972055182172e-07, |
|
"loss": 0.3877, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.5243190661478596e-07, |
|
"loss": 0.5078, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.521666077113548e-07, |
|
"loss": 0.4621, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.519013088079236e-07, |
|
"loss": 0.4411, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.516360099044924e-07, |
|
"loss": 0.5897, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.5137071100106117e-07, |
|
"loss": 0.4126, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.5110541209763e-07, |
|
"loss": 0.4725, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.508401131941988e-07, |
|
"loss": 0.4072, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.5057481429076756e-07, |
|
"loss": 0.5103, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.503095153873364e-07, |
|
"loss": 0.4725, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.500442164839052e-07, |
|
"loss": 0.4113, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.49778917580474e-07, |
|
"loss": 0.3694, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.495136186770428e-07, |
|
"loss": 0.4339, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.492483197736116e-07, |
|
"loss": 0.4924, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.489830208701804e-07, |
|
"loss": 0.4624, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.487177219667492e-07, |
|
"loss": 0.406, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.4845242306331797e-07, |
|
"loss": 0.4311, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.481871241598868e-07, |
|
"loss": 0.3505, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.479218252564556e-07, |
|
"loss": 0.5599, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.476565263530244e-07, |
|
"loss": 0.4658, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.473912274495932e-07, |
|
"loss": 0.3503, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.47125928546162e-07, |
|
"loss": 0.4346, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.468606296427308e-07, |
|
"loss": 0.4888, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.4659533073929957e-07, |
|
"loss": 0.4938, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.4633003183586843e-07, |
|
"loss": 0.3205, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.460647329324372e-07, |
|
"loss": 0.3941, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.45799434029006e-07, |
|
"loss": 0.447, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.455341351255748e-07, |
|
"loss": 0.2545, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.452688362221436e-07, |
|
"loss": 0.4893, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.450035373187124e-07, |
|
"loss": 0.4399, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.447382384152812e-07, |
|
"loss": 0.4247, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.4447293951185003e-07, |
|
"loss": 0.5176, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.442076406084188e-07, |
|
"loss": 0.5129, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.439423417049876e-07, |
|
"loss": 0.3134, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.436770428015564e-07, |
|
"loss": 0.4207, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.434117438981252e-07, |
|
"loss": 0.6662, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.43146444994694e-07, |
|
"loss": 0.4619, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.428811460912628e-07, |
|
"loss": 0.5505, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.426158471878316e-07, |
|
"loss": 0.5201, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.4235054828440044e-07, |
|
"loss": 0.3945, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.420852493809692e-07, |
|
"loss": 0.4216, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.41819950477538e-07, |
|
"loss": 0.4044, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.4155465157410683e-07, |
|
"loss": 0.4147, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.412893526706756e-07, |
|
"loss": 0.4367, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.410240537672444e-07, |
|
"loss": 0.4442, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.407587548638132e-07, |
|
"loss": 0.3516, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.4049345596038204e-07, |
|
"loss": 0.4012, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.402281570569508e-07, |
|
"loss": 0.3966, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.399628581535196e-07, |
|
"loss": 0.4944, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.3969755925008843e-07, |
|
"loss": 0.5081, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.394322603466572e-07, |
|
"loss": 0.5445, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.39166961443226e-07, |
|
"loss": 0.3361, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.389016625397948e-07, |
|
"loss": 0.3811, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.3863636363636364e-07, |
|
"loss": 0.4163, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.3837106473293243e-07, |
|
"loss": 0.3715, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.3810576582950121e-07, |
|
"loss": 0.4421, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.3784046692607003e-07, |
|
"loss": 0.5121, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.3757516802263884e-07, |
|
"loss": 0.4881, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.3730986911920763e-07, |
|
"loss": 0.4282, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.3704457021577642e-07, |
|
"loss": 0.3844, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.3677927131234523e-07, |
|
"loss": 0.3984, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.3651397240891402e-07, |
|
"loss": 0.3395, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.362486735054828e-07, |
|
"loss": 0.3538, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.3598337460205165e-07, |
|
"loss": 0.469, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.3571807569862044e-07, |
|
"loss": 0.4087, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.3545277679518923e-07, |
|
"loss": 0.5183, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.3518747789175804e-07, |
|
"loss": 0.4856, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3492217898832683e-07, |
|
"loss": 0.3906, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3465688008489562e-07, |
|
"loss": 0.4084, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3439158118146444e-07, |
|
"loss": 0.4071, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3412628227803325e-07, |
|
"loss": 0.4447, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3386098337460204e-07, |
|
"loss": 0.4718, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3359568447117085e-07, |
|
"loss": 0.4005, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3333038556773964e-07, |
|
"loss": 0.4809, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3306508666430843e-07, |
|
"loss": 0.3627, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3279978776087724e-07, |
|
"loss": 0.4105, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3253448885744603e-07, |
|
"loss": 0.4147, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3226918995401482e-07, |
|
"loss": 0.5299, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3200389105058366e-07, |
|
"loss": 0.4322, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3173859214715245e-07, |
|
"loss": 0.3699, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.3147329324372124e-07, |
|
"loss": 0.3699, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.3120799434029005e-07, |
|
"loss": 0.2841, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.3094269543685884e-07, |
|
"loss": 0.5602, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.3067739653342763e-07, |
|
"loss": 0.3621, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.3041209762999647e-07, |
|
"loss": 0.3471, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.3014679872656526e-07, |
|
"loss": 0.3144, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.2988149982313405e-07, |
|
"loss": 0.3478, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.2961620091970286e-07, |
|
"loss": 0.3689, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.2935090201627165e-07, |
|
"loss": 0.4915, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.2908560311284044e-07, |
|
"loss": 0.5927, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.2882030420940925e-07, |
|
"loss": 0.3461, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.2855500530597804e-07, |
|
"loss": 0.4687, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2828970640254686e-07, |
|
"loss": 0.3873, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2802440749911567e-07, |
|
"loss": 0.4167, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2775910859568446e-07, |
|
"loss": 0.369, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2749380969225325e-07, |
|
"loss": 0.3097, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2722851078882206e-07, |
|
"loss": 0.4325, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2696321188539085e-07, |
|
"loss": 0.3702, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2669791298195964e-07, |
|
"loss": 0.5312, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2643261407852848e-07, |
|
"loss": 0.2592, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2616731517509727e-07, |
|
"loss": 0.5263, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2590201627166606e-07, |
|
"loss": 0.3491, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2563671736823487e-07, |
|
"loss": 0.4487, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2537141846480366e-07, |
|
"loss": 0.4003, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2510611956137245e-07, |
|
"loss": 0.554, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.248408206579413e-07, |
|
"loss": 0.3426, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.2457552175451008e-07, |
|
"loss": 0.3495, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.2431022285107887e-07, |
|
"loss": 0.3407, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.2404492394764768e-07, |
|
"loss": 0.4444, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.2377962504421647e-07, |
|
"loss": 0.3747, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.2351432614078526e-07, |
|
"loss": 0.4015, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.2324902723735407e-07, |
|
"loss": 0.34, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.2298372833392286e-07, |
|
"loss": 0.4813, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.2271842943049165e-07, |
|
"loss": 0.4426, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.224531305270605e-07, |
|
"loss": 0.314, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.2218783162362928e-07, |
|
"loss": 0.4372, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.2192253272019807e-07, |
|
"loss": 0.5747, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.2165723381676688e-07, |
|
"loss": 0.3215, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.2139193491333567e-07, |
|
"loss": 0.4074, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.2112663600990446e-07, |
|
"loss": 0.3729, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.208613371064733e-07, |
|
"loss": 0.4418, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.205960382030421e-07, |
|
"loss": 0.4403, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.203307392996109e-07, |
|
"loss": 0.4159, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.200654403961797e-07, |
|
"loss": 0.4298, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.1980014149274848e-07, |
|
"loss": 0.4128, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.195348425893173e-07, |
|
"loss": 0.3948, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.1926954368588608e-07, |
|
"loss": 0.3271, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.190042447824549e-07, |
|
"loss": 0.4198, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.187389458790237e-07, |
|
"loss": 0.4266, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.184736469755925e-07, |
|
"loss": 0.4236, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.182083480721613e-07, |
|
"loss": 0.4734, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.179430491687301e-07, |
|
"loss": 0.4639, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.176777502652989e-07, |
|
"loss": 0.3681, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.1741245136186768e-07, |
|
"loss": 0.2595, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.1714715245843652e-07, |
|
"loss": 0.4727, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.168818535550053e-07, |
|
"loss": 0.3284, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.166165546515741e-07, |
|
"loss": 0.3289, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.163512557481429e-07, |
|
"loss": 0.4443, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.160859568447117e-07, |
|
"loss": 0.2787, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.158206579412805e-07, |
|
"loss": 0.6671, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.1555535903784933e-07, |
|
"loss": 0.3741, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.1529006013441812e-07, |
|
"loss": 0.3775, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.150247612309869e-07, |
|
"loss": 0.3939, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.1475946232755572e-07, |
|
"loss": 0.2898, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.144941634241245e-07, |
|
"loss": 0.535, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.142288645206933e-07, |
|
"loss": 0.3356, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.139635656172621e-07, |
|
"loss": 0.4034, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.136982667138309e-07, |
|
"loss": 0.3448, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.134329678103997e-07, |
|
"loss": 0.4358, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.1316766890696853e-07, |
|
"loss": 0.2735, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.1290237000353732e-07, |
|
"loss": 0.4325, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.126370711001061e-07, |
|
"loss": 0.3883, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.1237177219667492e-07, |
|
"loss": 0.3338, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.121064732932437e-07, |
|
"loss": 0.3989, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.118411743898125e-07, |
|
"loss": 0.3722, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.1157587548638134e-07, |
|
"loss": 0.3492, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.1131057658295013e-07, |
|
"loss": 0.3775, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.1104527767951891e-07, |
|
"loss": 0.3562, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.1077997877608773e-07, |
|
"loss": 0.6652, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.1051467987265652e-07, |
|
"loss": 0.3927, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.102493809692253e-07, |
|
"loss": 0.3628, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.0998408206579412e-07, |
|
"loss": 0.5185, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.0971878316236294e-07, |
|
"loss": 0.3643, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.0945348425893172e-07, |
|
"loss": 0.4946, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.0918818535550054e-07, |
|
"loss": 0.4316, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.0892288645206933e-07, |
|
"loss": 0.3947, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.0865758754863811e-07, |
|
"loss": 0.3337, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.0839228864520693e-07, |
|
"loss": 0.3022, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0812698974177572e-07, |
|
"loss": 0.3628, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.078616908383445e-07, |
|
"loss": 0.3703, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0759639193491335e-07, |
|
"loss": 0.275, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0733109303148214e-07, |
|
"loss": 0.3861, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0706579412805092e-07, |
|
"loss": 0.416, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0680049522461974e-07, |
|
"loss": 0.3163, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0653519632118853e-07, |
|
"loss": 0.371, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0626989741775732e-07, |
|
"loss": 0.2409, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0600459851432616e-07, |
|
"loss": 0.5185, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0573929961089494e-07, |
|
"loss": 0.3575, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0547400070746373e-07, |
|
"loss": 0.5805, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0520870180403255e-07, |
|
"loss": 0.3353, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0494340290060134e-07, |
|
"loss": 0.3381, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0467810399717012e-07, |
|
"loss": 0.3048, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0441280509373894e-07, |
|
"loss": 0.3347, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0414750619030773e-07, |
|
"loss": 0.3736, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0388220728687654e-07, |
|
"loss": 0.3378, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0361690838344536e-07, |
|
"loss": 0.3411, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0335160948001414e-07, |
|
"loss": 0.2934, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0308631057658293e-07, |
|
"loss": 0.4118, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0282101167315175e-07, |
|
"loss": 0.3951, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0255571276972054e-07, |
|
"loss": 0.322, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0229041386628932e-07, |
|
"loss": 0.3692, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0202511496285817e-07, |
|
"loss": 0.3782, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0175981605942695e-07, |
|
"loss": 0.4777, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.0149451715599574e-07, |
|
"loss": 0.421, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.0122921825256456e-07, |
|
"loss": 0.3586, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.0096391934913335e-07, |
|
"loss": 0.3617, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.0069862044570213e-07, |
|
"loss": 0.2693, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.0043332154227095e-07, |
|
"loss": 0.2447, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.0016802263883976e-07, |
|
"loss": 0.3542, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.7593100744805958, |
|
"eval_loss": 0.44453132152557373, |
|
"eval_runtime": 710.6654, |
|
"eval_samples_per_second": 3.59, |
|
"eval_steps_per_second": 0.898, |
|
"step": 5028 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.9990272373540855e-07, |
|
"loss": 0.3873, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.9963742483197737e-07, |
|
"loss": 0.3811, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.9937212592854615e-07, |
|
"loss": 0.2772, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.9910682702511494e-07, |
|
"loss": 0.4806, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.9884152812168376e-07, |
|
"loss": 0.3124, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.9857622921825255e-07, |
|
"loss": 0.247, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.9831093031482133e-07, |
|
"loss": 0.3793, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9804563141139017e-07, |
|
"loss": 0.5119, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9778033250795896e-07, |
|
"loss": 0.2919, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9751503360452775e-07, |
|
"loss": 0.3725, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9724973470109657e-07, |
|
"loss": 0.4659, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9698443579766535e-07, |
|
"loss": 0.3603, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9671913689423414e-07, |
|
"loss": 0.3629, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9645383799080298e-07, |
|
"loss": 0.4292, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9618853908737177e-07, |
|
"loss": 0.3256, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9592324018394056e-07, |
|
"loss": 0.2833, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9565794128050938e-07, |
|
"loss": 0.47, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9539264237707816e-07, |
|
"loss": 0.2898, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9512734347364695e-07, |
|
"loss": 0.2773, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9486204457021577e-07, |
|
"loss": 0.4663, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9459674566678455e-07, |
|
"loss": 0.4463, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9433144676335337e-07, |
|
"loss": 0.306, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9406614785992218e-07, |
|
"loss": 0.5645, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9380084895649097e-07, |
|
"loss": 0.3439, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9353555005305976e-07, |
|
"loss": 0.311, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9327025114962858e-07, |
|
"loss": 0.4669, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9300495224619736e-07, |
|
"loss": 0.2812, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9273965334276615e-07, |
|
"loss": 0.3005, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.92474354439335e-07, |
|
"loss": 0.3576, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9220905553590378e-07, |
|
"loss": 0.2867, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9194375663247257e-07, |
|
"loss": 0.2798, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9167845772904138e-07, |
|
"loss": 0.3237, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.9141315882561017e-07, |
|
"loss": 0.3129, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.9114785992217896e-07, |
|
"loss": 0.3229, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.908825610187478e-07, |
|
"loss": 0.4269, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.906172621153166e-07, |
|
"loss": 0.4231, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.9035196321188538e-07, |
|
"loss": 0.3866, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.900866643084542e-07, |
|
"loss": 0.2466, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.8982136540502298e-07, |
|
"loss": 0.4208, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.8955606650159177e-07, |
|
"loss": 0.3165, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.8929076759816058e-07, |
|
"loss": 0.3127, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.8902546869472937e-07, |
|
"loss": 0.3945, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.8876016979129816e-07, |
|
"loss": 0.381, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.88494870887867e-07, |
|
"loss": 0.4172, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.882295719844358e-07, |
|
"loss": 0.461, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.8796427308100458e-07, |
|
"loss": 0.46, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.876989741775734e-07, |
|
"loss": 0.2381, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.8743367527414218e-07, |
|
"loss": 0.4672, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.8716837637071097e-07, |
|
"loss": 0.2909, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.869030774672798e-07, |
|
"loss": 0.4985, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.866377785638486e-07, |
|
"loss": 0.3446, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.863724796604174e-07, |
|
"loss": 0.3878, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.861071807569862e-07, |
|
"loss": 0.2926, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.85841881853555e-07, |
|
"loss": 0.2187, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.8557658295012378e-07, |
|
"loss": 0.338, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.853112840466926e-07, |
|
"loss": 0.2448, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.850459851432614e-07, |
|
"loss": 0.2129, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.847806862398302e-07, |
|
"loss": 0.3554, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.84515387336399e-07, |
|
"loss": 0.444, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.842500884329678e-07, |
|
"loss": 0.306, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.839847895295366e-07, |
|
"loss": 0.4596, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.837194906261054e-07, |
|
"loss": 0.4518, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.834541917226742e-07, |
|
"loss": 0.2654, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.8318889281924298e-07, |
|
"loss": 0.2054, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.8292359391581182e-07, |
|
"loss": 0.2483, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.826582950123806e-07, |
|
"loss": 0.3451, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.823929961089494e-07, |
|
"loss": 0.3117, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.821276972055182e-07, |
|
"loss": 0.2829, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.81862398302087e-07, |
|
"loss": 0.3713, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.815970993986558e-07, |
|
"loss": 0.2603, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.8133180049522463e-07, |
|
"loss": 0.2378, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.8106650159179342e-07, |
|
"loss": 0.4374, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.808012026883622e-07, |
|
"loss": 0.4291, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.8053590378493102e-07, |
|
"loss": 0.4139, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.802706048814998e-07, |
|
"loss": 0.385, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.800053059780686e-07, |
|
"loss": 0.3487, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.797400070746374e-07, |
|
"loss": 0.316, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.794747081712062e-07, |
|
"loss": 0.3861, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7920940926777502e-07, |
|
"loss": 0.3524, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7894411036434383e-07, |
|
"loss": 0.2247, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7867881146091262e-07, |
|
"loss": 0.3113, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.784135125574814e-07, |
|
"loss": 0.5685, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7814821365405022e-07, |
|
"loss": 0.4044, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.77882914750619e-07, |
|
"loss": 0.3159, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.776176158471878e-07, |
|
"loss": 0.372, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7735231694375664e-07, |
|
"loss": 0.2586, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7708701804032543e-07, |
|
"loss": 0.3496, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7682171913689422e-07, |
|
"loss": 0.3252, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7655642023346303e-07, |
|
"loss": 0.2223, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7629112133003182e-07, |
|
"loss": 0.3757, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7602582242660063e-07, |
|
"loss": 0.3672, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7576052352316945e-07, |
|
"loss": 0.2711, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7549522461973824e-07, |
|
"loss": 0.3355, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7522992571630705e-07, |
|
"loss": 0.3543, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7496462681287584e-07, |
|
"loss": 0.3793, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7469932790944463e-07, |
|
"loss": 0.5126, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7443402900601344e-07, |
|
"loss": 0.2448, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7416873010258223e-07, |
|
"loss": 0.2939, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7390343119915102e-07, |
|
"loss": 0.246, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7363813229571986e-07, |
|
"loss": 0.4639, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7337283339228865e-07, |
|
"loss": 0.3554, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7310753448885744e-07, |
|
"loss": 0.5248, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7284223558542625e-07, |
|
"loss": 0.2787, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7257693668199504e-07, |
|
"loss": 0.3388, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7231163777856383e-07, |
|
"loss": 0.306, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7204633887513267e-07, |
|
"loss": 0.3312, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7178103997170146e-07, |
|
"loss": 0.3058, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.7151574106827025e-07, |
|
"loss": 0.6773, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.7125044216483906e-07, |
|
"loss": 0.2879, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.7098514326140785e-07, |
|
"loss": 0.3534, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.7071984435797664e-07, |
|
"loss": 0.3959, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.7045454545454545e-07, |
|
"loss": 0.4433, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.7018924655111424e-07, |
|
"loss": 0.3306, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6992394764768303e-07, |
|
"loss": 0.2876, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6965864874425187e-07, |
|
"loss": 0.2279, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6939334984082066e-07, |
|
"loss": 0.47, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6912805093738945e-07, |
|
"loss": 0.5794, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6886275203395826e-07, |
|
"loss": 0.3706, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6859745313052705e-07, |
|
"loss": 0.3674, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6833215422709584e-07, |
|
"loss": 0.4053, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6806685532366468e-07, |
|
"loss": 0.3724, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6780155642023347e-07, |
|
"loss": 0.4831, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6753625751680225e-07, |
|
"loss": 0.1894, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6727095861337107e-07, |
|
"loss": 0.4633, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6700565970993986e-07, |
|
"loss": 0.2882, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6674036080650865e-07, |
|
"loss": 0.4873, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6647506190307746e-07, |
|
"loss": 0.4315, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6620976299964628e-07, |
|
"loss": 0.4249, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6594446409621506e-07, |
|
"loss": 0.3353, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6567916519278388e-07, |
|
"loss": 0.7332, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6541386628935267e-07, |
|
"loss": 0.1961, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6514856738592146e-07, |
|
"loss": 0.3474, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6488326848249027e-07, |
|
"loss": 0.2356, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6461796957905906e-07, |
|
"loss": 0.3085, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6435267067562785e-07, |
|
"loss": 0.3192, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.640873717721967e-07, |
|
"loss": 0.2222, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6382207286876548e-07, |
|
"loss": 0.4065, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6355677396533426e-07, |
|
"loss": 0.4045, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6329147506190308e-07, |
|
"loss": 0.3183, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6302617615847187e-07, |
|
"loss": 0.1826, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6276087725504066e-07, |
|
"loss": 0.3059, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.624955783516095e-07, |
|
"loss": 0.2374, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6223027944817828e-07, |
|
"loss": 0.3193, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6196498054474707e-07, |
|
"loss": 0.2761, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.616996816413159e-07, |
|
"loss": 0.3633, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6143438273788468e-07, |
|
"loss": 0.3956, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6116908383445346e-07, |
|
"loss": 0.2156, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6090378493102228e-07, |
|
"loss": 0.3898, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6063848602759107e-07, |
|
"loss": 0.2793, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6037318712415988e-07, |
|
"loss": 0.3232, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.601078882207287e-07, |
|
"loss": 0.2486, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.5984258931729749e-07, |
|
"loss": 0.447, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.5957729041386627e-07, |
|
"loss": 0.368, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.593119915104351e-07, |
|
"loss": 0.3826, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.5904669260700388e-07, |
|
"loss": 0.2456, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.5878139370357266e-07, |
|
"loss": 0.3172, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.585160948001415e-07, |
|
"loss": 0.2154, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.582507958967103e-07, |
|
"loss": 0.3329, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.5798549699327908e-07, |
|
"loss": 0.266, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.577201980898479e-07, |
|
"loss": 0.3691, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.5745489918641669e-07, |
|
"loss": 0.2765, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.5718960028298547e-07, |
|
"loss": 0.2228, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.5692430137955432e-07, |
|
"loss": 0.1927, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.566590024761231e-07, |
|
"loss": 0.4745, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.563937035726919e-07, |
|
"loss": 0.1534, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.561284046692607e-07, |
|
"loss": 0.1242, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.558631057658295e-07, |
|
"loss": 0.4647, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.5559780686239828e-07, |
|
"loss": 0.5586, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.553325079589671e-07, |
|
"loss": 0.4222, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.5506720905553589e-07, |
|
"loss": 0.206, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5480191015210467e-07, |
|
"loss": 0.3112, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5453661124867352e-07, |
|
"loss": 0.4122, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.542713123452423e-07, |
|
"loss": 0.3272, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.540060134418111e-07, |
|
"loss": 0.3635, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.537407145383799e-07, |
|
"loss": 0.5525, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.534754156349487e-07, |
|
"loss": 0.2226, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5321011673151748e-07, |
|
"loss": 0.1621, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5294481782808632e-07, |
|
"loss": 0.5783, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.526795189246551e-07, |
|
"loss": 0.3406, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.524142200212239e-07, |
|
"loss": 0.2203, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5214892111779272e-07, |
|
"loss": 0.4043, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.518836222143615e-07, |
|
"loss": 0.3353, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.516183233109303e-07, |
|
"loss": 0.1902, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.513530244074991e-07, |
|
"loss": 0.3804, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5108772550406792e-07, |
|
"loss": 0.3891, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.508224266006367e-07, |
|
"loss": 0.2982, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5055712769720552e-07, |
|
"loss": 0.319, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.502918287937743e-07, |
|
"loss": 0.3148, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.500265298903431e-07, |
|
"loss": 0.3978, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4976123098691192e-07, |
|
"loss": 0.2571, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.494959320834807e-07, |
|
"loss": 0.4223, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4923063318004952e-07, |
|
"loss": 0.4157, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.489653342766183e-07, |
|
"loss": 0.4624, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4870003537318712e-07, |
|
"loss": 0.2294, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.484347364697559e-07, |
|
"loss": 0.2723, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4816943756632472e-07, |
|
"loss": 0.3129, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.479041386628935e-07, |
|
"loss": 0.2511, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4763883975946233e-07, |
|
"loss": 0.4619, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4737354085603112e-07, |
|
"loss": 0.3391, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4710824195259993e-07, |
|
"loss": 0.3051, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4684294304916872e-07, |
|
"loss": 0.2132, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.465776441457375e-07, |
|
"loss": 0.3529, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4631234524230632e-07, |
|
"loss": 0.411, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4604704633887514e-07, |
|
"loss": 0.5373, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4578174743544393e-07, |
|
"loss": 0.2255, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4551644853201271e-07, |
|
"loss": 0.3849, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4525114962858153e-07, |
|
"loss": 0.3697, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4498585072515032e-07, |
|
"loss": 0.3, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.4472055182171913e-07, |
|
"loss": 0.2502, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.4445525291828795e-07, |
|
"loss": 0.2659, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.4418995401485673e-07, |
|
"loss": 0.2379, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.4392465511142552e-07, |
|
"loss": 0.3465, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.4365935620799434e-07, |
|
"loss": 0.2112, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.4339405730456313e-07, |
|
"loss": 0.2263, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.4312875840113194e-07, |
|
"loss": 0.2231, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.4286345949770073e-07, |
|
"loss": 0.3545, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.4259816059426954e-07, |
|
"loss": 0.3163, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.4233286169083833e-07, |
|
"loss": 0.4264, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.4206756278740715e-07, |
|
"loss": 0.1768, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.4180226388397596e-07, |
|
"loss": 0.3796, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.4153696498054475e-07, |
|
"loss": 0.3193, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.4127166607711354e-07, |
|
"loss": 0.531, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.4100636717368235e-07, |
|
"loss": 0.2121, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.4074106827025114e-07, |
|
"loss": 0.3917, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.4047576936681996e-07, |
|
"loss": 0.0989, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.4021047046338874e-07, |
|
"loss": 0.5345, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3994517155995753e-07, |
|
"loss": 0.1947, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3967987265652635e-07, |
|
"loss": 0.3083, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3941457375309516e-07, |
|
"loss": 0.4358, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3914927484966395e-07, |
|
"loss": 0.1397, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3888397594623274e-07, |
|
"loss": 0.3461, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3861867704280155e-07, |
|
"loss": 0.237, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3835337813937034e-07, |
|
"loss": 0.3058, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3808807923593916e-07, |
|
"loss": 0.2037, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3782278033250797e-07, |
|
"loss": 0.3189, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3755748142907676e-07, |
|
"loss": 0.2856, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3729218252564555e-07, |
|
"loss": 0.3189, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3702688362221436e-07, |
|
"loss": 0.27, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3676158471878315e-07, |
|
"loss": 0.3278, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3649628581535196e-07, |
|
"loss": 0.5432, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3623098691192075e-07, |
|
"loss": 0.3994, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3596568800848954e-07, |
|
"loss": 0.1706, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3570038910505836e-07, |
|
"loss": 0.2554, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3543509020162717e-07, |
|
"loss": 0.3261, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3516979129819596e-07, |
|
"loss": 0.4152, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3490449239476477e-07, |
|
"loss": 0.4484, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3463919349133356e-07, |
|
"loss": 0.5551, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3437389458790235e-07, |
|
"loss": 0.5414, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3410859568447116e-07, |
|
"loss": 0.3728, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3384329678103998e-07, |
|
"loss": 0.576, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3357799787760877e-07, |
|
"loss": 0.5139, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3331269897417756e-07, |
|
"loss": 0.2673, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.7992943943551548, |
|
"eval_loss": 0.4343836009502411, |
|
"eval_runtime": 667.7031, |
|
"eval_samples_per_second": 3.821, |
|
"eval_steps_per_second": 0.956, |
|
"step": 7542 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.3304740007074637e-07, |
|
"loss": 0.3858, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.3278210116731516e-07, |
|
"loss": 0.4057, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.3251680226388397e-07, |
|
"loss": 0.2731, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.322515033604528e-07, |
|
"loss": 0.3348, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.3198620445702158e-07, |
|
"loss": 0.1794, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.3172090555359037e-07, |
|
"loss": 0.2986, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.3145560665015918e-07, |
|
"loss": 0.3266, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.3119030774672797e-07, |
|
"loss": 0.3126, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.3092500884329678e-07, |
|
"loss": 0.3153, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.3065970993986557e-07, |
|
"loss": 0.3477, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.3039441103643436e-07, |
|
"loss": 0.3177, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.3012911213300317e-07, |
|
"loss": 0.2935, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.29863813229572e-07, |
|
"loss": 0.3827, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.2959851432614078e-07, |
|
"loss": 0.3086, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.293332154227096e-07, |
|
"loss": 0.2633, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.2906791651927838e-07, |
|
"loss": 0.2911, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.2880261761584717e-07, |
|
"loss": 0.3615, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.2853731871241598e-07, |
|
"loss": 0.4242, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.282720198089848e-07, |
|
"loss": 0.2024, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.2800672090555359e-07, |
|
"loss": 0.4539, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.2774142200212237e-07, |
|
"loss": 0.2429, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.274761230986912e-07, |
|
"loss": 0.2325, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.2721082419525998e-07, |
|
"loss": 0.4162, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.269455252918288e-07, |
|
"loss": 0.3089, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.2668022638839758e-07, |
|
"loss": 0.4546, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.264149274849664e-07, |
|
"loss": 0.2543, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.2614962858153518e-07, |
|
"loss": 0.377, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.25884329678104e-07, |
|
"loss": 0.3386, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.2561903077467279e-07, |
|
"loss": 0.3492, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.253537318712416e-07, |
|
"loss": 0.275, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.250884329678104e-07, |
|
"loss": 0.3005, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.2482313406437918e-07, |
|
"loss": 0.301, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.24557835160948e-07, |
|
"loss": 0.3116, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.242925362575168e-07, |
|
"loss": 0.6061, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.240272373540856e-07, |
|
"loss": 0.2186, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.2376193845065438e-07, |
|
"loss": 0.2526, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.234966395472232e-07, |
|
"loss": 0.3165, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.2323134064379199e-07, |
|
"loss": 0.1801, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.229660417403608e-07, |
|
"loss": 0.4456, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.2270074283692962e-07, |
|
"loss": 0.1817, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.224354439334984e-07, |
|
"loss": 0.3027, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.221701450300672e-07, |
|
"loss": 0.2641, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.21904846126636e-07, |
|
"loss": 0.1637, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.216395472232048e-07, |
|
"loss": 0.4759, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.213742483197736e-07, |
|
"loss": 0.2518, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.211089494163424e-07, |
|
"loss": 0.3353, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.2084365051291121e-07, |
|
"loss": 0.2325, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.2057835160948e-07, |
|
"loss": 0.2963, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.2031305270604882e-07, |
|
"loss": 0.3176, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.2004775380261763e-07, |
|
"loss": 0.2076, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.1978245489918642e-07, |
|
"loss": 0.3213, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.195171559957552e-07, |
|
"loss": 0.2378, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.1925185709232402e-07, |
|
"loss": 0.1516, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.1898655818889281e-07, |
|
"loss": 0.2366, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.1872125928546161e-07, |
|
"loss": 0.4461, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.1845596038203043e-07, |
|
"loss": 0.402, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1819066147859922e-07, |
|
"loss": 0.477, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1792536257516802e-07, |
|
"loss": 0.331, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1766006367173683e-07, |
|
"loss": 0.1779, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1739476476830562e-07, |
|
"loss": 0.2126, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1712946586487442e-07, |
|
"loss": 0.3229, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1686416696144322e-07, |
|
"loss": 0.3967, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1659886805801201e-07, |
|
"loss": 0.12, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1633356915458083e-07, |
|
"loss": 0.2832, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1606827025114963e-07, |
|
"loss": 0.2127, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1580297134771842e-07, |
|
"loss": 0.2145, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1553767244428723e-07, |
|
"loss": 0.2717, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1527237354085603e-07, |
|
"loss": 0.2418, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1500707463742482e-07, |
|
"loss": 0.2614, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1474177573399363e-07, |
|
"loss": 0.1934, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1447647683056244e-07, |
|
"loss": 0.4718, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1421117792713122e-07, |
|
"loss": 0.2791, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1394587902370003e-07, |
|
"loss": 0.3744, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1368058012026884e-07, |
|
"loss": 0.4266, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1341528121683763e-07, |
|
"loss": 0.2921, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1314998231340643e-07, |
|
"loss": 0.3229, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1288468340997525e-07, |
|
"loss": 0.3329, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1261938450654403e-07, |
|
"loss": 0.1501, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1235408560311284e-07, |
|
"loss": 0.3098, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1208878669968164e-07, |
|
"loss": 0.3795, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1182348779625044e-07, |
|
"loss": 0.2139, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.1155818889281924e-07, |
|
"loss": 0.2172, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.1129288998938804e-07, |
|
"loss": 0.4199, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.1102759108595683e-07, |
|
"loss": 0.1432, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.1076229218252564e-07, |
|
"loss": 0.2769, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.1049699327909445e-07, |
|
"loss": 0.4188, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.1023169437566323e-07, |
|
"loss": 0.5459, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.0996639547223205e-07, |
|
"loss": 0.5241, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.0970109656880085e-07, |
|
"loss": 0.573, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.0943579766536964e-07, |
|
"loss": 0.2892, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.0917049876193844e-07, |
|
"loss": 0.2946, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.0890519985850725e-07, |
|
"loss": 0.2701, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.0863990095507604e-07, |
|
"loss": 0.3341, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.0837460205164484e-07, |
|
"loss": 0.347, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0810930314821366e-07, |
|
"loss": 0.3488, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0784400424478245e-07, |
|
"loss": 0.2516, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0757870534135125e-07, |
|
"loss": 0.4856, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0731340643792005e-07, |
|
"loss": 0.3262, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0704810753448885e-07, |
|
"loss": 0.4792, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0678280863105765e-07, |
|
"loss": 0.3977, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0651750972762645e-07, |
|
"loss": 0.247, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0625221082419524e-07, |
|
"loss": 0.5944, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0598691192076406e-07, |
|
"loss": 0.4538, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0572161301733286e-07, |
|
"loss": 0.322, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0545631411390165e-07, |
|
"loss": 0.8246, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0519101521047046e-07, |
|
"loss": 0.2921, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.0492571630703926e-07, |
|
"loss": 0.2552, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.0466041740360805e-07, |
|
"loss": 0.2207, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.0439511850017685e-07, |
|
"loss": 0.136, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.0412981959674567e-07, |
|
"loss": 0.5574, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.0386452069331446e-07, |
|
"loss": 0.2432, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.0359922178988326e-07, |
|
"loss": 0.4907, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.0333392288645207e-07, |
|
"loss": 0.2984, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.0306862398302086e-07, |
|
"loss": 0.3477, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.0280332507958966e-07, |
|
"loss": 0.4005, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.0253802617615848e-07, |
|
"loss": 0.4083, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.0227272727272727e-07, |
|
"loss": 0.3129, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.0200742836929607e-07, |
|
"loss": 0.4127, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.0174212946586487e-07, |
|
"loss": 0.1239, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.0147683056243366e-07, |
|
"loss": 0.1863, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.0121153165900247e-07, |
|
"loss": 0.3504, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.0094623275557127e-07, |
|
"loss": 0.3059, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.0068093385214006e-07, |
|
"loss": 0.1524, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.0041563494870888e-07, |
|
"loss": 0.38, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.0015033604527768e-07, |
|
"loss": 0.3079, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.988503714184647e-08, |
|
"loss": 0.3288, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.961973823841527e-08, |
|
"loss": 0.554, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.935443933498408e-08, |
|
"loss": 0.2589, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.908914043155288e-08, |
|
"loss": 0.2241, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.882384152812167e-08, |
|
"loss": 0.291, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.855854262469049e-08, |
|
"loss": 0.1931, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.829324372125929e-08, |
|
"loss": 0.2891, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.802794481782808e-08, |
|
"loss": 0.301, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.776264591439689e-08, |
|
"loss": 0.1596, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.749734701096569e-08, |
|
"loss": 0.261, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.723204810753448e-08, |
|
"loss": 0.2761, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.696674920410328e-08, |
|
"loss": 0.2916, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.67014503006721e-08, |
|
"loss": 0.4751, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.643615139724089e-08, |
|
"loss": 0.2561, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.617085249380969e-08, |
|
"loss": 0.1122, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.59055535903785e-08, |
|
"loss": 0.1563, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.564025468694729e-08, |
|
"loss": 0.2828, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.537495578351609e-08, |
|
"loss": 0.3116, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.510965688008489e-08, |
|
"loss": 0.1198, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.48443579766537e-08, |
|
"loss": 0.2329, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.45790590732225e-08, |
|
"loss": 0.2435, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.43137601697913e-08, |
|
"loss": 0.2814, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.404846126636009e-08, |
|
"loss": 0.4588, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.37831623629289e-08, |
|
"loss": 0.3259, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.35178634594977e-08, |
|
"loss": 0.2281, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.325256455606649e-08, |
|
"loss": 0.1325, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.29872656526353e-08, |
|
"loss": 0.1756, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.27219667492041e-08, |
|
"loss": 0.4611, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.24566678457729e-08, |
|
"loss": 0.3821, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.21913689423417e-08, |
|
"loss": 0.4035, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.192607003891051e-08, |
|
"loss": 0.2634, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.16607711354793e-08, |
|
"loss": 0.5165, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 9.13954722320481e-08, |
|
"loss": 0.2734, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 9.113017332861692e-08, |
|
"loss": 0.3244, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 9.08648744251857e-08, |
|
"loss": 0.5966, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 9.05995755217545e-08, |
|
"loss": 0.2808, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 9.033427661832331e-08, |
|
"loss": 0.0861, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 9.006897771489211e-08, |
|
"loss": 0.5532, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 8.980367881146091e-08, |
|
"loss": 0.1335, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 8.953837990802971e-08, |
|
"loss": 0.2124, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 8.92730810045985e-08, |
|
"loss": 0.3196, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 8.900778210116731e-08, |
|
"loss": 0.3537, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 8.874248319773612e-08, |
|
"loss": 0.2052, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 8.84771842943049e-08, |
|
"loss": 0.2914, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.821188539087372e-08, |
|
"loss": 0.201, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.794658648744252e-08, |
|
"loss": 0.5735, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.768128758401131e-08, |
|
"loss": 0.3856, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.741598868058011e-08, |
|
"loss": 0.3567, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.715068977714892e-08, |
|
"loss": 0.1276, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.688539087371771e-08, |
|
"loss": 0.2726, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.662009197028651e-08, |
|
"loss": 0.1824, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.635479306685533e-08, |
|
"loss": 0.2436, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.608949416342412e-08, |
|
"loss": 0.1757, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.582419525999292e-08, |
|
"loss": 0.115, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.555889635656172e-08, |
|
"loss": 0.3133, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.529359745313052e-08, |
|
"loss": 0.3231, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.502829854969932e-08, |
|
"loss": 0.3843, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.476299964626813e-08, |
|
"loss": 0.3218, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.449770074283691e-08, |
|
"loss": 0.2972, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.423240183940573e-08, |
|
"loss": 0.1626, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.396710293597453e-08, |
|
"loss": 0.3293, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.370180403254332e-08, |
|
"loss": 0.1546, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.343650512911213e-08, |
|
"loss": 0.2222, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.317120622568093e-08, |
|
"loss": 0.5815, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.290590732224972e-08, |
|
"loss": 0.4438, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.264060841881852e-08, |
|
"loss": 0.1896, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.237530951538734e-08, |
|
"loss": 0.5805, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.211001061195613e-08, |
|
"loss": 0.5619, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.184471170852493e-08, |
|
"loss": 0.256, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 8.157941280509374e-08, |
|
"loss": 0.1082, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 8.131411390166253e-08, |
|
"loss": 0.4024, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 8.104881499823133e-08, |
|
"loss": 0.4838, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 8.078351609480015e-08, |
|
"loss": 0.4411, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 8.051821719136894e-08, |
|
"loss": 0.4502, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 8.025291828793774e-08, |
|
"loss": 0.3072, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.998761938450654e-08, |
|
"loss": 0.0834, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.972232048107533e-08, |
|
"loss": 0.2351, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.945702157764414e-08, |
|
"loss": 0.293, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.919172267421294e-08, |
|
"loss": 0.4146, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.892642377078173e-08, |
|
"loss": 0.2291, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.866112486735055e-08, |
|
"loss": 0.1389, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.839582596391935e-08, |
|
"loss": 0.2594, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.813052706048814e-08, |
|
"loss": 0.5084, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.786522815705695e-08, |
|
"loss": 0.4067, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.759992925362575e-08, |
|
"loss": 0.3945, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.733463035019454e-08, |
|
"loss": 0.3778, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.706933144676334e-08, |
|
"loss": 0.2721, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.680403254333216e-08, |
|
"loss": 0.304, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.653873363990096e-08, |
|
"loss": 0.2496, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.627343473646975e-08, |
|
"loss": 0.3475, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.600813583303856e-08, |
|
"loss": 0.2106, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.574283692960736e-08, |
|
"loss": 0.4637, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.547753802617615e-08, |
|
"loss": 0.2955, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.521223912274495e-08, |
|
"loss": 0.4538, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.494694021931375e-08, |
|
"loss": 0.4784, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.468164131588256e-08, |
|
"loss": 0.2853, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.441634241245136e-08, |
|
"loss": 0.1619, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.415104350902016e-08, |
|
"loss": 0.4363, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.388574460558896e-08, |
|
"loss": 0.3628, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.362044570215776e-08, |
|
"loss": 0.5235, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.335514679872656e-08, |
|
"loss": 0.276, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.308984789529536e-08, |
|
"loss": 0.3532, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.282454899186417e-08, |
|
"loss": 0.3284, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.255925008843297e-08, |
|
"loss": 0.305, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.229395118500176e-08, |
|
"loss": 0.1895, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.202865228157057e-08, |
|
"loss": 0.6018, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.176335337813937e-08, |
|
"loss": 0.277, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.149805447470817e-08, |
|
"loss": 0.1618, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.123275557127698e-08, |
|
"loss": 0.1706, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.096745666784576e-08, |
|
"loss": 0.1546, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.070215776441458e-08, |
|
"loss": 0.2086, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.043685886098337e-08, |
|
"loss": 0.2052, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.017155995755217e-08, |
|
"loss": 0.2529, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 6.990626105412098e-08, |
|
"loss": 0.2568, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 6.964096215068977e-08, |
|
"loss": 0.5543, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 6.937566324725857e-08, |
|
"loss": 0.4673, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 6.911036434382737e-08, |
|
"loss": 0.2104, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 6.884506544039618e-08, |
|
"loss": 0.1235, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 6.857976653696498e-08, |
|
"loss": 0.3148, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 6.831446763353378e-08, |
|
"loss": 0.2482, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 6.804916873010258e-08, |
|
"loss": 0.2479, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 6.778386982667138e-08, |
|
"loss": 0.4603, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 6.751857092324018e-08, |
|
"loss": 0.181, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 6.725327201980898e-08, |
|
"loss": 0.0648, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 6.698797311637779e-08, |
|
"loss": 0.1411, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 6.672267421294659e-08, |
|
"loss": 0.3185, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_accuracy": 0.8200705605644845, |
|
"eval_loss": 0.4328227639198303, |
|
"eval_runtime": 675.1569, |
|
"eval_samples_per_second": 3.778, |
|
"eval_steps_per_second": 0.945, |
|
"step": 10056 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 6.645737530951539e-08, |
|
"loss": 0.3281, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 6.619207640608418e-08, |
|
"loss": 0.1367, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 6.592677750265299e-08, |
|
"loss": 0.1347, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 6.566147859922178e-08, |
|
"loss": 0.2287, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 6.539617969579058e-08, |
|
"loss": 0.079, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 6.51308807923594e-08, |
|
"loss": 0.4218, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.486558188892818e-08, |
|
"loss": 0.1522, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.460028298549699e-08, |
|
"loss": 0.2411, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.433498408206579e-08, |
|
"loss": 0.2314, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.406968517863459e-08, |
|
"loss": 0.4938, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.380438627520339e-08, |
|
"loss": 0.1713, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.353908737177219e-08, |
|
"loss": 0.3614, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.3273788468341e-08, |
|
"loss": 0.2386, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.30084895649098e-08, |
|
"loss": 0.4071, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.27431906614786e-08, |
|
"loss": 0.3515, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.24778917580474e-08, |
|
"loss": 0.3182, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.22125928546162e-08, |
|
"loss": 0.395, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.1947293951185e-08, |
|
"loss": 0.0892, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.16819950477538e-08, |
|
"loss": 0.4639, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 6.141669614432259e-08, |
|
"loss": 0.2954, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 6.11513972408914e-08, |
|
"loss": 0.3138, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 6.088609833746021e-08, |
|
"loss": 0.5433, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 6.062079943402901e-08, |
|
"loss": 0.2787, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 6.035550053059781e-08, |
|
"loss": 0.3296, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 6.00902016271666e-08, |
|
"loss": 0.1484, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 5.982490272373541e-08, |
|
"loss": 0.2677, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 5.955960382030421e-08, |
|
"loss": 0.2695, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 5.9294304916873e-08, |
|
"loss": 0.3003, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 5.902900601344181e-08, |
|
"loss": 0.1844, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 5.876370711001061e-08, |
|
"loss": 0.3414, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 5.849840820657941e-08, |
|
"loss": 0.2815, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 5.8233109303148216e-08, |
|
"loss": 0.2292, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.796781039971701e-08, |
|
"loss": 0.3515, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.770251149628581e-08, |
|
"loss": 0.3803, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.7437212592854614e-08, |
|
"loss": 0.29, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.7171913689423415e-08, |
|
"loss": 0.3232, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.690661478599221e-08, |
|
"loss": 0.093, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.664131588256102e-08, |
|
"loss": 0.2088, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.637601697912982e-08, |
|
"loss": 0.487, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.6110718075698615e-08, |
|
"loss": 0.2051, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.584541917226742e-08, |
|
"loss": 0.5831, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.558012026883622e-08, |
|
"loss": 0.4019, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.531482136540502e-08, |
|
"loss": 0.3914, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.504952246197383e-08, |
|
"loss": 0.1919, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.478422355854262e-08, |
|
"loss": 0.2466, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.451892465511142e-08, |
|
"loss": 0.2837, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.4253625751680225e-08, |
|
"loss": 0.3293, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.3988326848249027e-08, |
|
"loss": 0.2261, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.372302794481782e-08, |
|
"loss": 0.4414, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.345772904138663e-08, |
|
"loss": 0.4747, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.3192430137955425e-08, |
|
"loss": 0.297, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.2927131234524226e-08, |
|
"loss": 0.4797, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.2661832331093034e-08, |
|
"loss": 0.0799, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.239653342766183e-08, |
|
"loss": 0.1772, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.213123452423063e-08, |
|
"loss": 0.204, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.186593562079943e-08, |
|
"loss": 0.2605, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.1600636717368234e-08, |
|
"loss": 0.2314, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 5.133533781393703e-08, |
|
"loss": 0.383, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 5.1070038910505837e-08, |
|
"loss": 0.3802, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 5.080474000707463e-08, |
|
"loss": 0.155, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 5.053944110364343e-08, |
|
"loss": 0.4906, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 5.027414220021224e-08, |
|
"loss": 0.3776, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 5.0008843296781036e-08, |
|
"loss": 0.5581, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.974354439334984e-08, |
|
"loss": 0.3237, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.947824548991864e-08, |
|
"loss": 0.3431, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.921294658648744e-08, |
|
"loss": 0.241, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.894764768305624e-08, |
|
"loss": 0.389, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.8682348779625043e-08, |
|
"loss": 0.4295, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.841704987619384e-08, |
|
"loss": 0.2456, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.8151750972762646e-08, |
|
"loss": 0.5222, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.788645206933145e-08, |
|
"loss": 0.3246, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.762115316590024e-08, |
|
"loss": 0.4138, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.735585426246905e-08, |
|
"loss": 0.2455, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.7090555359037846e-08, |
|
"loss": 0.1867, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.682525645560665e-08, |
|
"loss": 0.3166, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.6559957552175455e-08, |
|
"loss": 0.1877, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.629465864874425e-08, |
|
"loss": 0.4553, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.6029359745313045e-08, |
|
"loss": 0.1521, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.5764060841881853e-08, |
|
"loss": 0.2166, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.5498761938450655e-08, |
|
"loss": 0.1694, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.523346303501945e-08, |
|
"loss": 0.1315, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.496816413158826e-08, |
|
"loss": 0.4086, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.470286522815705e-08, |
|
"loss": 0.3522, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.4437566324725854e-08, |
|
"loss": 0.1301, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.417226742129466e-08, |
|
"loss": 0.1597, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.390696851786346e-08, |
|
"loss": 0.4818, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.364166961443226e-08, |
|
"loss": 0.2074, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.337637071100106e-08, |
|
"loss": 0.0484, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.311107180756986e-08, |
|
"loss": 0.1093, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.2845772904138657e-08, |
|
"loss": 0.3121, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.2580474000707465e-08, |
|
"loss": 0.1872, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.231517509727626e-08, |
|
"loss": 0.4645, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.204987619384506e-08, |
|
"loss": 0.2252, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.178457729041387e-08, |
|
"loss": 0.3651, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.1519278386982664e-08, |
|
"loss": 0.272, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.1253979483551466e-08, |
|
"loss": 0.408, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.098868058012027e-08, |
|
"loss": 0.2049, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.072338167668907e-08, |
|
"loss": 0.1285, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.0458082773257864e-08, |
|
"loss": 0.4743, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.019278386982667e-08, |
|
"loss": 0.2603, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.9927484966395467e-08, |
|
"loss": 0.5623, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.966218606296427e-08, |
|
"loss": 0.2478, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.9396887159533076e-08, |
|
"loss": 0.2938, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.913158825610187e-08, |
|
"loss": 0.3001, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.886628935267067e-08, |
|
"loss": 0.1743, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.8600990449239474e-08, |
|
"loss": 0.3494, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.8335691545808276e-08, |
|
"loss": 0.3701, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.8070392642377084e-08, |
|
"loss": 0.6493, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.780509373894588e-08, |
|
"loss": 0.2456, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.7539794835514673e-08, |
|
"loss": 0.1634, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.727449593208348e-08, |
|
"loss": 0.2219, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.700919702865228e-08, |
|
"loss": 0.2627, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.674389812522108e-08, |
|
"loss": 0.6152, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.647859922178988e-08, |
|
"loss": 0.273, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.621330031835868e-08, |
|
"loss": 0.2135, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.594800141492748e-08, |
|
"loss": 0.2077, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.5682702511496284e-08, |
|
"loss": 0.6408, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.5417403608065085e-08, |
|
"loss": 0.1691, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.515210470463389e-08, |
|
"loss": 0.4814, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.488680580120269e-08, |
|
"loss": 0.1419, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.462150689777149e-08, |
|
"loss": 0.3493, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.435620799434029e-08, |
|
"loss": 0.5488, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.4090909090909086e-08, |
|
"loss": 0.4774, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.382561018747789e-08, |
|
"loss": 0.3423, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.356031128404669e-08, |
|
"loss": 0.3831, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.329501238061549e-08, |
|
"loss": 0.3576, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.302971347718429e-08, |
|
"loss": 0.2333, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.2764414573753094e-08, |
|
"loss": 0.2804, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.2499115670321895e-08, |
|
"loss": 0.2403, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.22338167668907e-08, |
|
"loss": 0.2754, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.19685178634595e-08, |
|
"loss": 0.3769, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.170321896002829e-08, |
|
"loss": 0.2209, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.1437920056597095e-08, |
|
"loss": 0.338, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.11726211531659e-08, |
|
"loss": 0.2673, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.09073222497347e-08, |
|
"loss": 0.2406, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.06420233463035e-08, |
|
"loss": 0.3665, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.03767244428723e-08, |
|
"loss": 0.1279, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.01114255394411e-08, |
|
"loss": 0.1948, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.9846126636009904e-08, |
|
"loss": 0.3244, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.9580827732578705e-08, |
|
"loss": 0.4653, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.9315528829147507e-08, |
|
"loss": 0.6083, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.9050229925716305e-08, |
|
"loss": 0.3703, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.8784931022285106e-08, |
|
"loss": 0.4843, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.8519632118853908e-08, |
|
"loss": 0.5002, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.8254333215422706e-08, |
|
"loss": 0.1321, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.7989034311991508e-08, |
|
"loss": 0.2233, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.7723735408560312e-08, |
|
"loss": 0.3006, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.745843650512911e-08, |
|
"loss": 0.5109, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.7193137601697912e-08, |
|
"loss": 0.5902, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.6927838698266714e-08, |
|
"loss": 0.4463, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.6662539794835512e-08, |
|
"loss": 0.1674, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.6397240891404313e-08, |
|
"loss": 0.3116, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.6131941987973115e-08, |
|
"loss": 0.5825, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.5866643084541913e-08, |
|
"loss": 0.2447, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.5601344181110718e-08, |
|
"loss": 0.5195, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.533604527767952e-08, |
|
"loss": 0.2391, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.5070746374248318e-08, |
|
"loss": 0.2846, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.480544747081712e-08, |
|
"loss": 0.1091, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.454014856738592e-08, |
|
"loss": 0.2265, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.4274849663954722e-08, |
|
"loss": 0.1529, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.400955076052352e-08, |
|
"loss": 0.1775, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.3744251857092322e-08, |
|
"loss": 0.2777, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.3478952953661127e-08, |
|
"loss": 0.1498, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.3213654050229925e-08, |
|
"loss": 0.0952, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.2948355146798726e-08, |
|
"loss": 0.2005, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.2683056243367528e-08, |
|
"loss": 0.4254, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.2417757339936326e-08, |
|
"loss": 0.5729, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.2152458436505127e-08, |
|
"loss": 0.2423, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.188715953307393e-08, |
|
"loss": 0.4267, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.1621860629642727e-08, |
|
"loss": 0.2208, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 2.1356561726211532e-08, |
|
"loss": 0.2322, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 2.1091262822780334e-08, |
|
"loss": 0.2109, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 2.0825963919349132e-08, |
|
"loss": 0.3221, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 2.0560665015917933e-08, |
|
"loss": 0.2768, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 2.0295366112486735e-08, |
|
"loss": 0.161, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 2.0030067209055533e-08, |
|
"loss": 0.4017, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.9764768305624334e-08, |
|
"loss": 0.4653, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.9499469402193136e-08, |
|
"loss": 0.3184, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.923417049876194e-08, |
|
"loss": 0.2338, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.896887159533074e-08, |
|
"loss": 0.5184, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.870357269189954e-08, |
|
"loss": 0.3949, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.843827378846834e-08, |
|
"loss": 0.2224, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.817297488503714e-08, |
|
"loss": 0.2273, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.790767598160594e-08, |
|
"loss": 0.4915, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.7642377078174743e-08, |
|
"loss": 0.2633, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.7377078174743545e-08, |
|
"loss": 0.4765, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.7111779271312346e-08, |
|
"loss": 0.3887, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.6846480367881144e-08, |
|
"loss": 0.4244, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.6581181464449946e-08, |
|
"loss": 0.3747, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.6315882561018747e-08, |
|
"loss": 0.2129, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.6050583657587546e-08, |
|
"loss": 0.2404, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.578528475415635e-08, |
|
"loss": 0.3692, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.551998585072515e-08, |
|
"loss": 0.3378, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.525468694729395e-08, |
|
"loss": 0.4417, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.498938804386275e-08, |
|
"loss": 0.3159, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.4724089140431551e-08, |
|
"loss": 0.4654, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.4458790237000355e-08, |
|
"loss": 0.2322, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.4193491333569154e-08, |
|
"loss": 0.2092, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.3928192430137954e-08, |
|
"loss": 0.2385, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.3662893526706756e-08, |
|
"loss": 0.3575, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.3397594623275557e-08, |
|
"loss": 0.2266, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.3132295719844357e-08, |
|
"loss": 0.234, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.2866996816413159e-08, |
|
"loss": 0.2282, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.2601697912981958e-08, |
|
"loss": 0.2577, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.2336399009550762e-08, |
|
"loss": 0.3597, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.2071100106119561e-08, |
|
"loss": 0.4429, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.1805801202688361e-08, |
|
"loss": 0.1702, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.1540502299257163e-08, |
|
"loss": 0.2193, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.1275203395825964e-08, |
|
"loss": 0.4391, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.1009904492394764e-08, |
|
"loss": 0.2284, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.0744605588963566e-08, |
|
"loss": 0.489, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.0479306685532365e-08, |
|
"loss": 0.1444, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.0214007782101165e-08, |
|
"loss": 0.2967, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 9.948708878669968e-09, |
|
"loss": 0.3594, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 9.683409975238768e-09, |
|
"loss": 0.5359, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 9.41811107180757e-09, |
|
"loss": 0.2757, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 9.152812168376371e-09, |
|
"loss": 0.3935, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 8.887513264945171e-09, |
|
"loss": 0.14, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 8.622214361513971e-09, |
|
"loss": 0.4049, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 8.356915458082773e-09, |
|
"loss": 0.2653, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 8.091616554651574e-09, |
|
"loss": 0.1909, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 7.826317651220374e-09, |
|
"loss": 0.2591, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 7.561018747789175e-09, |
|
"loss": 0.1013, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 7.295719844357976e-09, |
|
"loss": 0.1527, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 7.0304209409267776e-09, |
|
"loss": 0.1982, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 6.765122037495577e-09, |
|
"loss": 0.3196, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 6.499823134064379e-09, |
|
"loss": 0.2233, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 6.2345242306331796e-09, |
|
"loss": 0.2557, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 5.969225327201981e-09, |
|
"loss": 0.1804, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 5.703926423770781e-09, |
|
"loss": 0.2618, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 5.4386275203395824e-09, |
|
"loss": 0.4291, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 5.173328616908383e-09, |
|
"loss": 0.2799, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 4.908029713477184e-09, |
|
"loss": 0.1811, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 4.6427308100459845e-09, |
|
"loss": 0.2275, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 4.377431906614786e-09, |
|
"loss": 0.4325, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 4.112133003183587e-09, |
|
"loss": 0.22, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.846834099752387e-09, |
|
"loss": 0.22, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.5815351963211884e-09, |
|
"loss": 0.199, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.3162362928899895e-09, |
|
"loss": 0.4157, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.05093738945879e-09, |
|
"loss": 0.4612, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 2.7856384860275913e-09, |
|
"loss": 0.2144, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 2.520339582596392e-09, |
|
"loss": 0.3051, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 2.2550406791651926e-09, |
|
"loss": 0.4694, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.9897417757339937e-09, |
|
"loss": 0.5988, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.7244428723027944e-09, |
|
"loss": 0.1659, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.4591439688715953e-09, |
|
"loss": 0.4332, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.1938450654403961e-09, |
|
"loss": 0.2026, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 9.28546162009197e-10, |
|
"loss": 0.3017, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 6.632472585779978e-10, |
|
"loss": 0.2656, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.979483551467987e-10, |
|
"loss": 0.5015, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.3264945171559958e-10, |
|
"loss": 0.4006, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_accuracy": 0.8251666013328106, |
|
"eval_loss": 0.44296565651893616, |
|
"eval_runtime": 623.4351, |
|
"eval_samples_per_second": 4.092, |
|
"eval_steps_per_second": 1.023, |
|
"step": 12565 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"step": 12565, |
|
"total_flos": 2.2067541850155437e+20, |
|
"train_loss": 0.39015588782197014, |
|
"train_runtime": 24412.2348, |
|
"train_samples_per_second": 2.059, |
|
"train_steps_per_second": 0.515 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_accuracy": 0.8251666013328106, |
|
"eval_loss": 0.44296565651893616, |
|
"eval_runtime": 654.7979, |
|
"eval_samples_per_second": 3.896, |
|
"eval_steps_per_second": 0.974, |
|
"step": 12565 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_accuracy": 0.8251666013328106, |
|
"eval_loss": 0.44296565651893616, |
|
"eval_runtime": 750.9501, |
|
"eval_samples_per_second": 3.397, |
|
"eval_steps_per_second": 0.85, |
|
"step": 12565 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 12565, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 2.2067541850155437e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|