{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999764438436699, "eval_steps": 500, "global_step": 32833, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00030456444547975205, "grad_norm": 1712.0, "learning_rate": 2.0000000000000003e-06, "loss": 7.2821, "step": 10 }, { "epoch": 0.0006091288909595041, "grad_norm": 1624.0, "learning_rate": 4.000000000000001e-06, "loss": 7.281, "step": 20 }, { "epoch": 0.0009136933364392562, "grad_norm": 1336.0, "learning_rate": 6e-06, "loss": 7.1654, "step": 30 }, { "epoch": 0.0012182577819190082, "grad_norm": 1088.0, "learning_rate": 8.000000000000001e-06, "loss": 7.0065, "step": 40 }, { "epoch": 0.0015228222273987603, "grad_norm": 824.0, "learning_rate": 1e-05, "loss": 6.7471, "step": 50 }, { "epoch": 0.0018273866728785123, "grad_norm": 636.0, "learning_rate": 1.2e-05, "loss": 6.4372, "step": 60 }, { "epoch": 0.0021319511183582646, "grad_norm": 596.0, "learning_rate": 1.4000000000000001e-05, "loss": 6.2298, "step": 70 }, { "epoch": 0.0024365155638380164, "grad_norm": 402.0, "learning_rate": 1.6000000000000003e-05, "loss": 5.9743, "step": 80 }, { "epoch": 0.0027410800093177687, "grad_norm": 366.0, "learning_rate": 1.8e-05, "loss": 5.6511, "step": 90 }, { "epoch": 0.0030456444547975205, "grad_norm": 270.0, "learning_rate": 2e-05, "loss": 5.3713, "step": 100 }, { "epoch": 0.003350208900277273, "grad_norm": 200.0, "learning_rate": 2.2000000000000003e-05, "loss": 5.0917, "step": 110 }, { "epoch": 0.0036547733457570246, "grad_norm": 167.0, "learning_rate": 2.4e-05, "loss": 4.8868, "step": 120 }, { "epoch": 0.003959337791236777, "grad_norm": 138.0, "learning_rate": 2.6000000000000002e-05, "loss": 4.6662, "step": 130 }, { "epoch": 0.004263902236716529, "grad_norm": 112.0, "learning_rate": 2.8000000000000003e-05, "loss": 4.5005, "step": 140 }, { "epoch": 0.0045684666821962806, "grad_norm": 97.5, "learning_rate": 3e-05, "loss": 4.4035, "step": 150 }, { "epoch": 0.004873031127676033, "grad_norm": 98.0, "learning_rate": 3.2000000000000005e-05, "loss": 4.2933, "step": 160 }, { "epoch": 0.005177595573155785, "grad_norm": 84.5, "learning_rate": 3.4000000000000007e-05, "loss": 4.2046, "step": 170 }, { "epoch": 0.005482160018635537, "grad_norm": 75.0, "learning_rate": 3.6e-05, "loss": 4.1066, "step": 180 }, { "epoch": 0.005786724464115289, "grad_norm": 73.0, "learning_rate": 3.8e-05, "loss": 4.0418, "step": 190 }, { "epoch": 0.006091288909595041, "grad_norm": 55.75, "learning_rate": 4e-05, "loss": 3.9952, "step": 200 }, { "epoch": 0.006395853355074793, "grad_norm": 57.0, "learning_rate": 4.2e-05, "loss": 3.9604, "step": 210 }, { "epoch": 0.006700417800554546, "grad_norm": 54.5, "learning_rate": 4.4000000000000006e-05, "loss": 3.9196, "step": 220 }, { "epoch": 0.007004982246034297, "grad_norm": 67.0, "learning_rate": 4.600000000000001e-05, "loss": 3.9066, "step": 230 }, { "epoch": 0.007309546691514049, "grad_norm": 59.25, "learning_rate": 4.8e-05, "loss": 3.8473, "step": 240 }, { "epoch": 0.0076141111369938015, "grad_norm": 60.75, "learning_rate": 5e-05, "loss": 3.8279, "step": 250 }, { "epoch": 0.007918675582473554, "grad_norm": 58.0, "learning_rate": 5.2000000000000004e-05, "loss": 3.7672, "step": 260 }, { "epoch": 0.008223240027953306, "grad_norm": 53.0, "learning_rate": 5.4000000000000005e-05, "loss": 3.7729, "step": 270 }, { "epoch": 0.008527804473433058, "grad_norm": 56.5, "learning_rate": 5.6000000000000006e-05, "loss": 3.7447, "step": 280 }, { "epoch": 0.008832368918912809, "grad_norm": 57.75, "learning_rate": 5.8e-05, "loss": 3.7273, "step": 290 }, { "epoch": 0.009136933364392561, "grad_norm": 52.0, "learning_rate": 6e-05, "loss": 3.6868, "step": 300 }, { "epoch": 0.009441497809872313, "grad_norm": 46.75, "learning_rate": 6.2e-05, "loss": 3.6677, "step": 310 }, { "epoch": 0.009746062255352066, "grad_norm": 46.75, "learning_rate": 6.400000000000001e-05, "loss": 3.6275, "step": 320 }, { "epoch": 0.010050626700831818, "grad_norm": 49.75, "learning_rate": 6.6e-05, "loss": 3.6204, "step": 330 }, { "epoch": 0.01035519114631157, "grad_norm": 47.75, "learning_rate": 6.800000000000001e-05, "loss": 3.6154, "step": 340 }, { "epoch": 0.010659755591791322, "grad_norm": 53.25, "learning_rate": 7e-05, "loss": 3.6029, "step": 350 }, { "epoch": 0.010964320037271075, "grad_norm": 46.0, "learning_rate": 7.2e-05, "loss": 3.6168, "step": 360 }, { "epoch": 0.011268884482750825, "grad_norm": 47.5, "learning_rate": 7.4e-05, "loss": 3.555, "step": 370 }, { "epoch": 0.011573448928230578, "grad_norm": 47.5, "learning_rate": 7.6e-05, "loss": 3.5631, "step": 380 }, { "epoch": 0.01187801337371033, "grad_norm": 54.0, "learning_rate": 7.800000000000001e-05, "loss": 3.5768, "step": 390 }, { "epoch": 0.012182577819190082, "grad_norm": 47.75, "learning_rate": 8e-05, "loss": 3.5789, "step": 400 }, { "epoch": 0.012487142264669834, "grad_norm": 46.5, "learning_rate": 8.2e-05, "loss": 3.5292, "step": 410 }, { "epoch": 0.012791706710149587, "grad_norm": 50.0, "learning_rate": 8.4e-05, "loss": 3.5421, "step": 420 }, { "epoch": 0.013096271155629339, "grad_norm": 51.25, "learning_rate": 8.6e-05, "loss": 3.5277, "step": 430 }, { "epoch": 0.013400835601109091, "grad_norm": 41.75, "learning_rate": 8.800000000000001e-05, "loss": 3.4871, "step": 440 }, { "epoch": 0.013705400046588842, "grad_norm": 44.5, "learning_rate": 9e-05, "loss": 3.4888, "step": 450 }, { "epoch": 0.014009964492068594, "grad_norm": 44.5, "learning_rate": 9.200000000000001e-05, "loss": 3.4769, "step": 460 }, { "epoch": 0.014314528937548346, "grad_norm": 48.5, "learning_rate": 9.4e-05, "loss": 3.483, "step": 470 }, { "epoch": 0.014619093383028098, "grad_norm": 47.0, "learning_rate": 9.6e-05, "loss": 3.5106, "step": 480 }, { "epoch": 0.01492365782850785, "grad_norm": 45.75, "learning_rate": 9.8e-05, "loss": 3.4542, "step": 490 }, { "epoch": 0.015228222273987603, "grad_norm": 46.0, "learning_rate": 0.0001, "loss": 3.4654, "step": 500 }, { "epoch": 0.015532786719467355, "grad_norm": 44.25, "learning_rate": 0.00010200000000000001, "loss": 3.4498, "step": 510 }, { "epoch": 0.015837351164947108, "grad_norm": 45.25, "learning_rate": 0.00010400000000000001, "loss": 3.4726, "step": 520 }, { "epoch": 0.016141915610426858, "grad_norm": 45.0, "learning_rate": 0.00010600000000000002, "loss": 3.4338, "step": 530 }, { "epoch": 0.016446480055906612, "grad_norm": 47.75, "learning_rate": 0.00010800000000000001, "loss": 3.4394, "step": 540 }, { "epoch": 0.016751044501386363, "grad_norm": 45.5, "learning_rate": 0.00011000000000000002, "loss": 3.4434, "step": 550 }, { "epoch": 0.017055608946866117, "grad_norm": 44.75, "learning_rate": 0.00011200000000000001, "loss": 3.4578, "step": 560 }, { "epoch": 0.017360173392345867, "grad_norm": 50.5, "learning_rate": 0.00011399999999999999, "loss": 3.4284, "step": 570 }, { "epoch": 0.017664737837825618, "grad_norm": 61.25, "learning_rate": 0.000116, "loss": 3.4273, "step": 580 }, { "epoch": 0.01796930228330537, "grad_norm": 47.75, "learning_rate": 0.000118, "loss": 3.403, "step": 590 }, { "epoch": 0.018273866728785122, "grad_norm": 48.0, "learning_rate": 0.00012, "loss": 3.425, "step": 600 }, { "epoch": 0.018578431174264876, "grad_norm": 51.5, "learning_rate": 0.000122, "loss": 3.3928, "step": 610 }, { "epoch": 0.018882995619744627, "grad_norm": 54.0, "learning_rate": 0.000124, "loss": 3.3714, "step": 620 }, { "epoch": 0.01918756006522438, "grad_norm": 46.5, "learning_rate": 0.000126, "loss": 3.3776, "step": 630 }, { "epoch": 0.01949212451070413, "grad_norm": 43.25, "learning_rate": 0.00012800000000000002, "loss": 3.3798, "step": 640 }, { "epoch": 0.019796688956183885, "grad_norm": 53.25, "learning_rate": 0.00013000000000000002, "loss": 3.3848, "step": 650 }, { "epoch": 0.020101253401663636, "grad_norm": 46.5, "learning_rate": 0.000132, "loss": 3.4025, "step": 660 }, { "epoch": 0.020405817847143386, "grad_norm": 51.0, "learning_rate": 0.000134, "loss": 3.354, "step": 670 }, { "epoch": 0.02071038229262314, "grad_norm": 48.25, "learning_rate": 0.00013600000000000003, "loss": 3.4052, "step": 680 }, { "epoch": 0.02101494673810289, "grad_norm": 49.5, "learning_rate": 0.000138, "loss": 3.4084, "step": 690 }, { "epoch": 0.021319511183582645, "grad_norm": 54.0, "learning_rate": 0.00014, "loss": 3.3168, "step": 700 }, { "epoch": 0.021624075629062395, "grad_norm": 53.5, "learning_rate": 0.000142, "loss": 3.3556, "step": 710 }, { "epoch": 0.02192864007454215, "grad_norm": 56.25, "learning_rate": 0.000144, "loss": 3.3688, "step": 720 }, { "epoch": 0.0222332045200219, "grad_norm": 51.0, "learning_rate": 0.000146, "loss": 3.348, "step": 730 }, { "epoch": 0.02253776896550165, "grad_norm": 49.0, "learning_rate": 0.000148, "loss": 3.3495, "step": 740 }, { "epoch": 0.022842333410981405, "grad_norm": 56.5, "learning_rate": 0.00015000000000000001, "loss": 3.3517, "step": 750 }, { "epoch": 0.023146897856461155, "grad_norm": 49.0, "learning_rate": 0.000152, "loss": 3.3773, "step": 760 }, { "epoch": 0.02345146230194091, "grad_norm": 50.25, "learning_rate": 0.000154, "loss": 3.3664, "step": 770 }, { "epoch": 0.02375602674742066, "grad_norm": 48.0, "learning_rate": 0.00015600000000000002, "loss": 3.3329, "step": 780 }, { "epoch": 0.024060591192900414, "grad_norm": 43.25, "learning_rate": 0.00015800000000000002, "loss": 3.3333, "step": 790 }, { "epoch": 0.024365155638380164, "grad_norm": 46.5, "learning_rate": 0.00016, "loss": 3.3663, "step": 800 }, { "epoch": 0.024669720083859918, "grad_norm": 41.0, "learning_rate": 0.000162, "loss": 3.3054, "step": 810 }, { "epoch": 0.02497428452933967, "grad_norm": 44.25, "learning_rate": 0.000164, "loss": 3.352, "step": 820 }, { "epoch": 0.02527884897481942, "grad_norm": 48.5, "learning_rate": 0.000166, "loss": 3.351, "step": 830 }, { "epoch": 0.025583413420299173, "grad_norm": 49.0, "learning_rate": 0.000168, "loss": 3.3049, "step": 840 }, { "epoch": 0.025887977865778924, "grad_norm": 45.5, "learning_rate": 0.00017, "loss": 3.3288, "step": 850 }, { "epoch": 0.026192542311258678, "grad_norm": 41.5, "learning_rate": 0.000172, "loss": 3.3026, "step": 860 }, { "epoch": 0.02649710675673843, "grad_norm": 41.75, "learning_rate": 0.000174, "loss": 3.3539, "step": 870 }, { "epoch": 0.026801671202218182, "grad_norm": 48.75, "learning_rate": 0.00017600000000000002, "loss": 3.3316, "step": 880 }, { "epoch": 0.027106235647697933, "grad_norm": 46.25, "learning_rate": 0.00017800000000000002, "loss": 3.3352, "step": 890 }, { "epoch": 0.027410800093177683, "grad_norm": 45.0, "learning_rate": 0.00018, "loss": 3.3132, "step": 900 }, { "epoch": 0.027715364538657437, "grad_norm": 51.25, "learning_rate": 0.000182, "loss": 3.3133, "step": 910 }, { "epoch": 0.028019928984137188, "grad_norm": 45.0, "learning_rate": 0.00018400000000000003, "loss": 3.3192, "step": 920 }, { "epoch": 0.028324493429616942, "grad_norm": 44.75, "learning_rate": 0.00018600000000000002, "loss": 3.2938, "step": 930 }, { "epoch": 0.028629057875096692, "grad_norm": 45.75, "learning_rate": 0.000188, "loss": 3.2892, "step": 940 }, { "epoch": 0.028933622320576446, "grad_norm": 54.75, "learning_rate": 0.00019, "loss": 3.3209, "step": 950 }, { "epoch": 0.029238186766056197, "grad_norm": 62.5, "learning_rate": 0.000192, "loss": 3.294, "step": 960 }, { "epoch": 0.02954275121153595, "grad_norm": 46.25, "learning_rate": 0.000194, "loss": 3.2645, "step": 970 }, { "epoch": 0.0298473156570157, "grad_norm": 42.5, "learning_rate": 0.000196, "loss": 3.3449, "step": 980 }, { "epoch": 0.030151880102495452, "grad_norm": 44.75, "learning_rate": 0.00019800000000000002, "loss": 3.317, "step": 990 }, { "epoch": 0.030456444547975206, "grad_norm": 45.0, "learning_rate": 0.0002, "loss": 3.2821, "step": 1000 }, { "epoch": 0.030761008993454957, "grad_norm": 44.25, "learning_rate": 0.00019999995617145177, "loss": 3.3125, "step": 1010 }, { "epoch": 0.03106557343893471, "grad_norm": 43.25, "learning_rate": 0.00019999982468584977, "loss": 3.2814, "step": 1020 }, { "epoch": 0.031370137884414465, "grad_norm": 43.25, "learning_rate": 0.00019999960554332205, "loss": 3.3309, "step": 1030 }, { "epoch": 0.031674702329894215, "grad_norm": 45.25, "learning_rate": 0.00019999929874408206, "loss": 3.2843, "step": 1040 }, { "epoch": 0.031979266775373966, "grad_norm": 41.75, "learning_rate": 0.0001999989042884286, "loss": 3.3072, "step": 1050 }, { "epoch": 0.032283831220853716, "grad_norm": 42.25, "learning_rate": 0.00019999842217674585, "loss": 3.3001, "step": 1060 }, { "epoch": 0.03258839566633347, "grad_norm": 37.75, "learning_rate": 0.0001999978524095034, "loss": 3.2713, "step": 1070 }, { "epoch": 0.032892960111813224, "grad_norm": 42.75, "learning_rate": 0.00019999719498725618, "loss": 3.3007, "step": 1080 }, { "epoch": 0.033197524557292975, "grad_norm": 42.75, "learning_rate": 0.00019999644991064447, "loss": 3.2716, "step": 1090 }, { "epoch": 0.033502089002772725, "grad_norm": 44.75, "learning_rate": 0.000199995617180394, "loss": 3.3206, "step": 1100 }, { "epoch": 0.033806653448252476, "grad_norm": 38.5, "learning_rate": 0.00019999469679731578, "loss": 3.2675, "step": 1110 }, { "epoch": 0.03411121789373223, "grad_norm": 43.25, "learning_rate": 0.00019999368876230623, "loss": 3.3306, "step": 1120 }, { "epoch": 0.034415782339211984, "grad_norm": 43.5, "learning_rate": 0.00019999259307634714, "loss": 3.27, "step": 1130 }, { "epoch": 0.034720346784691734, "grad_norm": 41.5, "learning_rate": 0.00019999140974050572, "loss": 3.2667, "step": 1140 }, { "epoch": 0.035024911230171485, "grad_norm": 39.75, "learning_rate": 0.0001999901387559345, "loss": 3.2915, "step": 1150 }, { "epoch": 0.035329475675651235, "grad_norm": 41.25, "learning_rate": 0.0001999887801238713, "loss": 3.2601, "step": 1160 }, { "epoch": 0.03563404012113099, "grad_norm": 40.75, "learning_rate": 0.00019998733384563947, "loss": 3.2591, "step": 1170 }, { "epoch": 0.03593860456661074, "grad_norm": 39.0, "learning_rate": 0.00019998579992264753, "loss": 3.2152, "step": 1180 }, { "epoch": 0.036243169012090494, "grad_norm": 42.5, "learning_rate": 0.00019998417835638964, "loss": 3.2722, "step": 1190 }, { "epoch": 0.036547733457570244, "grad_norm": 39.5, "learning_rate": 0.000199982469148445, "loss": 3.2906, "step": 1200 }, { "epoch": 0.03685229790305, "grad_norm": 39.5, "learning_rate": 0.0001999806723004784, "loss": 3.277, "step": 1210 }, { "epoch": 0.03715686234852975, "grad_norm": 36.0, "learning_rate": 0.00019997878781423986, "loss": 3.2514, "step": 1220 }, { "epoch": 0.0374614267940095, "grad_norm": 47.0, "learning_rate": 0.0001999768156915649, "loss": 3.2341, "step": 1230 }, { "epoch": 0.037765991239489254, "grad_norm": 36.5, "learning_rate": 0.0001999747559343742, "loss": 3.2794, "step": 1240 }, { "epoch": 0.038070555684969004, "grad_norm": 37.25, "learning_rate": 0.00019997260854467396, "loss": 3.2759, "step": 1250 }, { "epoch": 0.03837512013044876, "grad_norm": 36.25, "learning_rate": 0.00019997037352455566, "loss": 3.2755, "step": 1260 }, { "epoch": 0.03867968457592851, "grad_norm": 46.75, "learning_rate": 0.0001999680508761961, "loss": 3.2274, "step": 1270 }, { "epoch": 0.03898424902140826, "grad_norm": 38.75, "learning_rate": 0.0001999656406018575, "loss": 3.2493, "step": 1280 }, { "epoch": 0.03928881346688801, "grad_norm": 39.5, "learning_rate": 0.00019996314270388739, "loss": 3.2274, "step": 1290 }, { "epoch": 0.03959337791236777, "grad_norm": 36.75, "learning_rate": 0.00019996055718471861, "loss": 3.2471, "step": 1300 }, { "epoch": 0.03989794235784752, "grad_norm": 39.0, "learning_rate": 0.00019995788404686944, "loss": 3.2375, "step": 1310 }, { "epoch": 0.04020250680332727, "grad_norm": 40.0, "learning_rate": 0.00019995512329294338, "loss": 3.2286, "step": 1320 }, { "epoch": 0.04050707124880702, "grad_norm": 38.75, "learning_rate": 0.00019995227492562929, "loss": 3.252, "step": 1330 }, { "epoch": 0.04081163569428677, "grad_norm": 37.5, "learning_rate": 0.00019994933894770147, "loss": 3.268, "step": 1340 }, { "epoch": 0.04111620013976653, "grad_norm": 38.0, "learning_rate": 0.00019994631536201935, "loss": 3.2373, "step": 1350 }, { "epoch": 0.04142076458524628, "grad_norm": 38.5, "learning_rate": 0.00019994320417152792, "loss": 3.2742, "step": 1360 }, { "epoch": 0.04172532903072603, "grad_norm": 40.5, "learning_rate": 0.0001999400053792573, "loss": 3.2384, "step": 1370 }, { "epoch": 0.04202989347620578, "grad_norm": 34.5, "learning_rate": 0.00019993671898832307, "loss": 3.1999, "step": 1380 }, { "epoch": 0.04233445792168553, "grad_norm": 40.5, "learning_rate": 0.00019993334500192605, "loss": 3.1946, "step": 1390 }, { "epoch": 0.04263902236716529, "grad_norm": 35.25, "learning_rate": 0.00019992988342335232, "loss": 3.2222, "step": 1400 }, { "epoch": 0.04294358681264504, "grad_norm": 36.0, "learning_rate": 0.00019992633425597347, "loss": 3.2409, "step": 1410 }, { "epoch": 0.04324815125812479, "grad_norm": 37.0, "learning_rate": 0.0001999226975032462, "loss": 3.226, "step": 1420 }, { "epoch": 0.04355271570360454, "grad_norm": 36.5, "learning_rate": 0.0001999189731687126, "loss": 3.2183, "step": 1430 }, { "epoch": 0.0438572801490843, "grad_norm": 39.0, "learning_rate": 0.00019991516125600006, "loss": 3.2243, "step": 1440 }, { "epoch": 0.04416184459456405, "grad_norm": 36.0, "learning_rate": 0.00019991126176882124, "loss": 3.1994, "step": 1450 }, { "epoch": 0.0444664090400438, "grad_norm": 39.75, "learning_rate": 0.00019990727471097417, "loss": 3.2232, "step": 1460 }, { "epoch": 0.04477097348552355, "grad_norm": 35.75, "learning_rate": 0.00019990320008634203, "loss": 3.1689, "step": 1470 }, { "epoch": 0.0450755379310033, "grad_norm": 36.0, "learning_rate": 0.00019989903789889346, "loss": 3.2198, "step": 1480 }, { "epoch": 0.04538010237648306, "grad_norm": 35.25, "learning_rate": 0.00019989478815268225, "loss": 3.21, "step": 1490 }, { "epoch": 0.04568466682196281, "grad_norm": 40.5, "learning_rate": 0.00019989045085184749, "loss": 3.2125, "step": 1500 }, { "epoch": 0.04598923126744256, "grad_norm": 35.25, "learning_rate": 0.0001998860260006136, "loss": 3.1754, "step": 1510 }, { "epoch": 0.04629379571292231, "grad_norm": 36.75, "learning_rate": 0.00019988151360329027, "loss": 3.1852, "step": 1520 }, { "epoch": 0.04659836015840207, "grad_norm": 39.0, "learning_rate": 0.00019987691366427242, "loss": 3.183, "step": 1530 }, { "epoch": 0.04690292460388182, "grad_norm": 37.25, "learning_rate": 0.0001998722261880402, "loss": 3.1532, "step": 1540 }, { "epoch": 0.04720748904936157, "grad_norm": 35.5, "learning_rate": 0.00019986745117915909, "loss": 3.2279, "step": 1550 }, { "epoch": 0.04751205349484132, "grad_norm": 34.75, "learning_rate": 0.0001998625886422798, "loss": 3.204, "step": 1560 }, { "epoch": 0.04781661794032107, "grad_norm": 39.25, "learning_rate": 0.0001998576385821383, "loss": 3.2071, "step": 1570 }, { "epoch": 0.04812118238580083, "grad_norm": 39.25, "learning_rate": 0.00019985260100355572, "loss": 3.2144, "step": 1580 }, { "epoch": 0.04842574683128058, "grad_norm": 36.25, "learning_rate": 0.00019984747591143855, "loss": 3.1855, "step": 1590 }, { "epoch": 0.04873031127676033, "grad_norm": 35.5, "learning_rate": 0.00019984226331077847, "loss": 3.194, "step": 1600 }, { "epoch": 0.04903487572224008, "grad_norm": 39.0, "learning_rate": 0.00019983696320665238, "loss": 3.1695, "step": 1610 }, { "epoch": 0.049339440167719836, "grad_norm": 39.25, "learning_rate": 0.00019983157560422239, "loss": 3.2022, "step": 1620 }, { "epoch": 0.04964400461319959, "grad_norm": 38.25, "learning_rate": 0.0001998261005087359, "loss": 3.1826, "step": 1630 }, { "epoch": 0.04994856905867934, "grad_norm": 35.75, "learning_rate": 0.0001998205379255254, "loss": 3.1969, "step": 1640 }, { "epoch": 0.05025313350415909, "grad_norm": 36.5, "learning_rate": 0.0001998148878600087, "loss": 3.2267, "step": 1650 }, { "epoch": 0.05055769794963884, "grad_norm": 35.25, "learning_rate": 0.0001998091503176888, "loss": 3.1961, "step": 1660 }, { "epoch": 0.050862262395118596, "grad_norm": 37.75, "learning_rate": 0.0001998033253041539, "loss": 3.1904, "step": 1670 }, { "epoch": 0.051166826840598346, "grad_norm": 38.75, "learning_rate": 0.0001997974128250773, "loss": 3.2038, "step": 1680 }, { "epoch": 0.0514713912860781, "grad_norm": 37.0, "learning_rate": 0.00019979141288621765, "loss": 3.1903, "step": 1690 }, { "epoch": 0.05177595573155785, "grad_norm": 35.25, "learning_rate": 0.00019978532549341863, "loss": 3.2023, "step": 1700 }, { "epoch": 0.052080520177037605, "grad_norm": 40.0, "learning_rate": 0.0001997791506526092, "loss": 3.2023, "step": 1710 }, { "epoch": 0.052385084622517356, "grad_norm": 35.5, "learning_rate": 0.00019977288836980344, "loss": 3.1343, "step": 1720 }, { "epoch": 0.052689649067997106, "grad_norm": 35.0, "learning_rate": 0.00019976653865110065, "loss": 3.159, "step": 1730 }, { "epoch": 0.05299421351347686, "grad_norm": 38.5, "learning_rate": 0.0001997601015026852, "loss": 3.2229, "step": 1740 }, { "epoch": 0.05329877795895661, "grad_norm": 37.75, "learning_rate": 0.00019975357693082662, "loss": 3.19, "step": 1750 }, { "epoch": 0.053603342404436365, "grad_norm": 34.5, "learning_rate": 0.00019974696494187979, "loss": 3.1826, "step": 1760 }, { "epoch": 0.053907906849916115, "grad_norm": 34.5, "learning_rate": 0.00019974026554228443, "loss": 3.1388, "step": 1770 }, { "epoch": 0.054212471295395866, "grad_norm": 39.5, "learning_rate": 0.0001997334787385656, "loss": 3.1633, "step": 1780 }, { "epoch": 0.054517035740875616, "grad_norm": 44.25, "learning_rate": 0.00019972660453733342, "loss": 3.1548, "step": 1790 }, { "epoch": 0.05482160018635537, "grad_norm": 35.5, "learning_rate": 0.00019971964294528312, "loss": 3.1515, "step": 1800 }, { "epoch": 0.055126164631835124, "grad_norm": 35.25, "learning_rate": 0.0001997125939691951, "loss": 3.187, "step": 1810 }, { "epoch": 0.055430729077314875, "grad_norm": 38.75, "learning_rate": 0.0001997054576159348, "loss": 3.2157, "step": 1820 }, { "epoch": 0.055735293522794625, "grad_norm": 35.0, "learning_rate": 0.00019969823389245285, "loss": 3.1873, "step": 1830 }, { "epoch": 0.056039857968274376, "grad_norm": 33.5, "learning_rate": 0.00019969092280578486, "loss": 3.1688, "step": 1840 }, { "epoch": 0.05634442241375413, "grad_norm": 36.75, "learning_rate": 0.00019968352436305163, "loss": 3.1707, "step": 1850 }, { "epoch": 0.056648986859233884, "grad_norm": 34.5, "learning_rate": 0.000199676038571459, "loss": 3.1595, "step": 1860 }, { "epoch": 0.056953551304713634, "grad_norm": 35.5, "learning_rate": 0.0001996684654382979, "loss": 3.2125, "step": 1870 }, { "epoch": 0.057258115750193385, "grad_norm": 38.0, "learning_rate": 0.00019966080497094428, "loss": 3.1673, "step": 1880 }, { "epoch": 0.057562680195673135, "grad_norm": 34.75, "learning_rate": 0.00019965305717685922, "loss": 3.1719, "step": 1890 }, { "epoch": 0.05786724464115289, "grad_norm": 38.25, "learning_rate": 0.00019964522206358882, "loss": 3.1636, "step": 1900 }, { "epoch": 0.05817180908663264, "grad_norm": 33.25, "learning_rate": 0.00019963729963876423, "loss": 3.1454, "step": 1910 }, { "epoch": 0.058476373532112394, "grad_norm": 33.25, "learning_rate": 0.0001996292899101016, "loss": 3.1837, "step": 1920 }, { "epoch": 0.058780937977592145, "grad_norm": 75.0, "learning_rate": 0.0001996211928854022, "loss": 3.1646, "step": 1930 }, { "epoch": 0.0590855024230719, "grad_norm": 35.0, "learning_rate": 0.00019961300857255224, "loss": 3.14, "step": 1940 }, { "epoch": 0.05939006686855165, "grad_norm": 35.75, "learning_rate": 0.00019960473697952296, "loss": 3.1535, "step": 1950 }, { "epoch": 0.0596946313140314, "grad_norm": 39.5, "learning_rate": 0.00019959637811437067, "loss": 3.1769, "step": 1960 }, { "epoch": 0.059999195759511154, "grad_norm": 33.25, "learning_rate": 0.0001995879319852366, "loss": 3.1531, "step": 1970 }, { "epoch": 0.060303760204990904, "grad_norm": 34.75, "learning_rate": 0.00019957939860034703, "loss": 3.1818, "step": 1980 }, { "epoch": 0.06060832465047066, "grad_norm": 35.25, "learning_rate": 0.00019957077796801316, "loss": 3.1807, "step": 1990 }, { "epoch": 0.06091288909595041, "grad_norm": 35.25, "learning_rate": 0.00019956207009663127, "loss": 3.1156, "step": 2000 }, { "epoch": 0.06121745354143016, "grad_norm": 35.0, "learning_rate": 0.0001995532749946825, "loss": 3.1395, "step": 2010 }, { "epoch": 0.06152201798690991, "grad_norm": 37.0, "learning_rate": 0.00019954439267073305, "loss": 3.1671, "step": 2020 }, { "epoch": 0.06182658243238967, "grad_norm": 35.75, "learning_rate": 0.00019953542313343395, "loss": 3.1615, "step": 2030 }, { "epoch": 0.06213114687786942, "grad_norm": 37.5, "learning_rate": 0.00019952636639152124, "loss": 3.1475, "step": 2040 }, { "epoch": 0.06243571132334917, "grad_norm": 37.0, "learning_rate": 0.00019951722245381591, "loss": 3.0898, "step": 2050 }, { "epoch": 0.06274027576882893, "grad_norm": 34.75, "learning_rate": 0.00019950799132922392, "loss": 3.1452, "step": 2060 }, { "epoch": 0.06304484021430867, "grad_norm": 34.25, "learning_rate": 0.000199498673026736, "loss": 3.1204, "step": 2070 }, { "epoch": 0.06334940465978843, "grad_norm": 37.25, "learning_rate": 0.00019948926755542797, "loss": 3.138, "step": 2080 }, { "epoch": 0.06365396910526817, "grad_norm": 35.0, "learning_rate": 0.00019947977492446034, "loss": 3.1509, "step": 2090 }, { "epoch": 0.06395853355074793, "grad_norm": 34.25, "learning_rate": 0.0001994701951430787, "loss": 3.1515, "step": 2100 }, { "epoch": 0.06426309799622769, "grad_norm": 34.5, "learning_rate": 0.00019946052822061348, "loss": 3.1454, "step": 2110 }, { "epoch": 0.06456766244170743, "grad_norm": 34.75, "learning_rate": 0.0001994507741664799, "loss": 3.1516, "step": 2120 }, { "epoch": 0.06487222688718719, "grad_norm": 36.5, "learning_rate": 0.00019944093299017806, "loss": 3.1689, "step": 2130 }, { "epoch": 0.06517679133266693, "grad_norm": 34.25, "learning_rate": 0.00019943100470129298, "loss": 3.1544, "step": 2140 }, { "epoch": 0.06548135577814669, "grad_norm": 35.0, "learning_rate": 0.00019942098930949453, "loss": 3.1539, "step": 2150 }, { "epoch": 0.06578592022362645, "grad_norm": 34.5, "learning_rate": 0.00019941088682453735, "loss": 3.1385, "step": 2160 }, { "epoch": 0.06609048466910619, "grad_norm": 32.0, "learning_rate": 0.00019940069725626092, "loss": 3.118, "step": 2170 }, { "epoch": 0.06639504911458595, "grad_norm": 33.25, "learning_rate": 0.00019939042061458957, "loss": 3.1235, "step": 2180 }, { "epoch": 0.06669961356006571, "grad_norm": 33.5, "learning_rate": 0.0001993800569095324, "loss": 3.1299, "step": 2190 }, { "epoch": 0.06700417800554545, "grad_norm": 34.0, "learning_rate": 0.00019936960615118336, "loss": 3.1457, "step": 2200 }, { "epoch": 0.06730874245102521, "grad_norm": 33.75, "learning_rate": 0.00019935906834972112, "loss": 3.1327, "step": 2210 }, { "epoch": 0.06761330689650495, "grad_norm": 34.5, "learning_rate": 0.00019934844351540915, "loss": 3.1755, "step": 2220 }, { "epoch": 0.06791787134198471, "grad_norm": 35.5, "learning_rate": 0.00019933773165859572, "loss": 3.1268, "step": 2230 }, { "epoch": 0.06822243578746447, "grad_norm": 32.5, "learning_rate": 0.00019932693278971385, "loss": 3.1656, "step": 2240 }, { "epoch": 0.06852700023294421, "grad_norm": 35.75, "learning_rate": 0.00019931604691928123, "loss": 3.1376, "step": 2250 }, { "epoch": 0.06883156467842397, "grad_norm": 33.5, "learning_rate": 0.0001993050740579004, "loss": 3.143, "step": 2260 }, { "epoch": 0.06913612912390371, "grad_norm": 32.75, "learning_rate": 0.00019929401421625855, "loss": 3.1173, "step": 2270 }, { "epoch": 0.06944069356938347, "grad_norm": 32.25, "learning_rate": 0.00019928286740512763, "loss": 3.1462, "step": 2280 }, { "epoch": 0.06974525801486323, "grad_norm": 34.25, "learning_rate": 0.00019927163363536425, "loss": 3.1486, "step": 2290 }, { "epoch": 0.07004982246034297, "grad_norm": 34.75, "learning_rate": 0.00019926031291790975, "loss": 3.0985, "step": 2300 }, { "epoch": 0.07035438690582273, "grad_norm": 34.5, "learning_rate": 0.0001992489052637901, "loss": 3.103, "step": 2310 }, { "epoch": 0.07065895135130247, "grad_norm": 34.0, "learning_rate": 0.00019923741068411609, "loss": 3.1244, "step": 2320 }, { "epoch": 0.07096351579678223, "grad_norm": 34.5, "learning_rate": 0.00019922582919008297, "loss": 3.1362, "step": 2330 }, { "epoch": 0.07126808024226199, "grad_norm": 33.0, "learning_rate": 0.00019921416079297078, "loss": 3.0869, "step": 2340 }, { "epoch": 0.07157264468774173, "grad_norm": 33.25, "learning_rate": 0.00019920240550414415, "loss": 3.1218, "step": 2350 }, { "epoch": 0.07187720913322149, "grad_norm": 35.75, "learning_rate": 0.0001991905633350524, "loss": 3.1192, "step": 2360 }, { "epoch": 0.07218177357870123, "grad_norm": 34.0, "learning_rate": 0.00019917863429722937, "loss": 3.167, "step": 2370 }, { "epoch": 0.07248633802418099, "grad_norm": 34.0, "learning_rate": 0.0001991666184022936, "loss": 3.1567, "step": 2380 }, { "epoch": 0.07279090246966075, "grad_norm": 34.75, "learning_rate": 0.0001991545156619481, "loss": 3.136, "step": 2390 }, { "epoch": 0.07309546691514049, "grad_norm": 33.25, "learning_rate": 0.00019914232608798065, "loss": 3.1186, "step": 2400 }, { "epoch": 0.07340003136062025, "grad_norm": 36.25, "learning_rate": 0.00019913004969226343, "loss": 3.1187, "step": 2410 }, { "epoch": 0.0737045958061, "grad_norm": 35.25, "learning_rate": 0.0001991176864867533, "loss": 3.1112, "step": 2420 }, { "epoch": 0.07400916025157975, "grad_norm": 34.75, "learning_rate": 0.0001991052364834916, "loss": 3.0985, "step": 2430 }, { "epoch": 0.0743137246970595, "grad_norm": 33.25, "learning_rate": 0.00019909269969460423, "loss": 3.1411, "step": 2440 }, { "epoch": 0.07461828914253925, "grad_norm": 36.75, "learning_rate": 0.0001990800761323016, "loss": 3.096, "step": 2450 }, { "epoch": 0.074922853588019, "grad_norm": 34.5, "learning_rate": 0.00019906736580887872, "loss": 3.1146, "step": 2460 }, { "epoch": 0.07522741803349876, "grad_norm": 34.25, "learning_rate": 0.00019905456873671497, "loss": 3.1133, "step": 2470 }, { "epoch": 0.07553198247897851, "grad_norm": 32.75, "learning_rate": 0.00019904168492827432, "loss": 3.1001, "step": 2480 }, { "epoch": 0.07583654692445826, "grad_norm": 35.75, "learning_rate": 0.00019902871439610514, "loss": 3.1271, "step": 2490 }, { "epoch": 0.07614111136993801, "grad_norm": 34.25, "learning_rate": 0.00019901565715284034, "loss": 3.1109, "step": 2500 }, { "epoch": 0.07644567581541777, "grad_norm": 32.25, "learning_rate": 0.0001990025132111973, "loss": 3.1028, "step": 2510 }, { "epoch": 0.07675024026089752, "grad_norm": 32.25, "learning_rate": 0.00019898928258397768, "loss": 3.1352, "step": 2520 }, { "epoch": 0.07705480470637727, "grad_norm": 32.0, "learning_rate": 0.00019897596528406782, "loss": 3.1331, "step": 2530 }, { "epoch": 0.07735936915185702, "grad_norm": 34.75, "learning_rate": 0.00019896256132443822, "loss": 3.1181, "step": 2540 }, { "epoch": 0.07766393359733677, "grad_norm": 33.75, "learning_rate": 0.00019894907071814398, "loss": 3.1269, "step": 2550 }, { "epoch": 0.07796849804281653, "grad_norm": 32.75, "learning_rate": 0.0001989354934783245, "loss": 3.1114, "step": 2560 }, { "epoch": 0.07827306248829628, "grad_norm": 33.75, "learning_rate": 0.00019892182961820355, "loss": 3.1135, "step": 2570 }, { "epoch": 0.07857762693377603, "grad_norm": 33.75, "learning_rate": 0.0001989080791510893, "loss": 3.1062, "step": 2580 }, { "epoch": 0.07888219137925578, "grad_norm": 32.0, "learning_rate": 0.00019889424209037427, "loss": 3.0826, "step": 2590 }, { "epoch": 0.07918675582473554, "grad_norm": 33.5, "learning_rate": 0.00019888031844953532, "loss": 3.134, "step": 2600 }, { "epoch": 0.07949132027021528, "grad_norm": 35.0, "learning_rate": 0.00019886630824213363, "loss": 3.1096, "step": 2610 }, { "epoch": 0.07979588471569504, "grad_norm": 33.25, "learning_rate": 0.00019885221148181466, "loss": 3.092, "step": 2620 }, { "epoch": 0.08010044916117479, "grad_norm": 31.375, "learning_rate": 0.0001988380281823082, "loss": 3.0738, "step": 2630 }, { "epoch": 0.08040501360665454, "grad_norm": 34.5, "learning_rate": 0.00019882375835742836, "loss": 3.0963, "step": 2640 }, { "epoch": 0.0807095780521343, "grad_norm": 32.25, "learning_rate": 0.00019880940202107343, "loss": 3.0982, "step": 2650 }, { "epoch": 0.08101414249761404, "grad_norm": 35.25, "learning_rate": 0.00019879495918722611, "loss": 3.1072, "step": 2660 }, { "epoch": 0.0813187069430938, "grad_norm": 35.0, "learning_rate": 0.00019878042986995316, "loss": 3.1028, "step": 2670 }, { "epoch": 0.08162327138857355, "grad_norm": 31.875, "learning_rate": 0.00019876581408340572, "loss": 3.1351, "step": 2680 }, { "epoch": 0.0819278358340533, "grad_norm": 34.0, "learning_rate": 0.0001987511118418191, "loss": 3.1008, "step": 2690 }, { "epoch": 0.08223240027953306, "grad_norm": 32.75, "learning_rate": 0.00019873632315951277, "loss": 3.126, "step": 2700 }, { "epoch": 0.0825369647250128, "grad_norm": 32.5, "learning_rate": 0.00019872144805089048, "loss": 3.1151, "step": 2710 }, { "epoch": 0.08284152917049256, "grad_norm": 34.25, "learning_rate": 0.00019870648653044006, "loss": 3.0971, "step": 2720 }, { "epoch": 0.0831460936159723, "grad_norm": 32.5, "learning_rate": 0.00019869143861273356, "loss": 3.0719, "step": 2730 }, { "epoch": 0.08345065806145206, "grad_norm": 32.75, "learning_rate": 0.00019867630431242715, "loss": 3.1066, "step": 2740 }, { "epoch": 0.08375522250693182, "grad_norm": 31.875, "learning_rate": 0.0001986610836442612, "loss": 3.0575, "step": 2750 }, { "epoch": 0.08405978695241156, "grad_norm": 32.5, "learning_rate": 0.0001986457766230601, "loss": 3.1117, "step": 2760 }, { "epoch": 0.08436435139789132, "grad_norm": 35.25, "learning_rate": 0.00019863038326373248, "loss": 3.1244, "step": 2770 }, { "epoch": 0.08466891584337106, "grad_norm": 33.0, "learning_rate": 0.00019861490358127085, "loss": 3.0879, "step": 2780 }, { "epoch": 0.08497348028885082, "grad_norm": 32.75, "learning_rate": 0.000198599337590752, "loss": 3.0994, "step": 2790 }, { "epoch": 0.08527804473433058, "grad_norm": 32.5, "learning_rate": 0.00019858368530733668, "loss": 3.0765, "step": 2800 }, { "epoch": 0.08558260917981032, "grad_norm": 32.25, "learning_rate": 0.0001985679467462697, "loss": 3.1, "step": 2810 }, { "epoch": 0.08588717362529008, "grad_norm": 33.5, "learning_rate": 0.00019855212192287997, "loss": 3.099, "step": 2820 }, { "epoch": 0.08619173807076984, "grad_norm": 33.75, "learning_rate": 0.0001985362108525803, "loss": 3.0653, "step": 2830 }, { "epoch": 0.08649630251624958, "grad_norm": 32.25, "learning_rate": 0.00019852021355086754, "loss": 3.0789, "step": 2840 }, { "epoch": 0.08680086696172934, "grad_norm": 34.5, "learning_rate": 0.0001985041300333226, "loss": 3.1207, "step": 2850 }, { "epoch": 0.08710543140720908, "grad_norm": 32.75, "learning_rate": 0.00019848796031561027, "loss": 3.0963, "step": 2860 }, { "epoch": 0.08740999585268884, "grad_norm": 33.75, "learning_rate": 0.00019847170441347937, "loss": 3.0948, "step": 2870 }, { "epoch": 0.0877145602981686, "grad_norm": 32.75, "learning_rate": 0.0001984553623427626, "loss": 3.076, "step": 2880 }, { "epoch": 0.08801912474364834, "grad_norm": 33.25, "learning_rate": 0.00019843893411937663, "loss": 3.0588, "step": 2890 }, { "epoch": 0.0883236891891281, "grad_norm": 32.75, "learning_rate": 0.00019842241975932197, "loss": 3.0662, "step": 2900 }, { "epoch": 0.08862825363460784, "grad_norm": 32.25, "learning_rate": 0.0001984058192786831, "loss": 3.125, "step": 2910 }, { "epoch": 0.0889328180800876, "grad_norm": 32.0, "learning_rate": 0.00019838913269362837, "loss": 3.0749, "step": 2920 }, { "epoch": 0.08923738252556736, "grad_norm": 32.25, "learning_rate": 0.00019837236002040999, "loss": 3.1095, "step": 2930 }, { "epoch": 0.0895419469710471, "grad_norm": 37.0, "learning_rate": 0.00019835550127536393, "loss": 3.0994, "step": 2940 }, { "epoch": 0.08984651141652686, "grad_norm": 32.5, "learning_rate": 0.00019833855647491015, "loss": 3.0878, "step": 2950 }, { "epoch": 0.0901510758620066, "grad_norm": 34.0, "learning_rate": 0.00019832152563555228, "loss": 3.0345, "step": 2960 }, { "epoch": 0.09045564030748636, "grad_norm": 34.0, "learning_rate": 0.00019830440877387784, "loss": 3.0811, "step": 2970 }, { "epoch": 0.09076020475296612, "grad_norm": 33.5, "learning_rate": 0.00019828720590655806, "loss": 3.0812, "step": 2980 }, { "epoch": 0.09106476919844586, "grad_norm": 32.25, "learning_rate": 0.00019826991705034804, "loss": 3.1128, "step": 2990 }, { "epoch": 0.09136933364392562, "grad_norm": 33.75, "learning_rate": 0.00019825254222208648, "loss": 3.0806, "step": 3000 }, { "epoch": 0.09167389808940538, "grad_norm": 32.0, "learning_rate": 0.000198235081438696, "loss": 3.086, "step": 3010 }, { "epoch": 0.09197846253488512, "grad_norm": 36.0, "learning_rate": 0.0001982175347171828, "loss": 3.0764, "step": 3020 }, { "epoch": 0.09228302698036488, "grad_norm": 33.5, "learning_rate": 0.00019819990207463683, "loss": 3.0554, "step": 3030 }, { "epoch": 0.09258759142584462, "grad_norm": 33.0, "learning_rate": 0.00019818218352823168, "loss": 3.0732, "step": 3040 }, { "epoch": 0.09289215587132438, "grad_norm": 31.75, "learning_rate": 0.00019816437909522466, "loss": 3.0719, "step": 3050 }, { "epoch": 0.09319672031680414, "grad_norm": 34.0, "learning_rate": 0.00019814648879295672, "loss": 3.1172, "step": 3060 }, { "epoch": 0.09350128476228388, "grad_norm": 35.25, "learning_rate": 0.00019812851263885245, "loss": 3.091, "step": 3070 }, { "epoch": 0.09380584920776364, "grad_norm": 33.5, "learning_rate": 0.00019811045065042002, "loss": 3.0852, "step": 3080 }, { "epoch": 0.09411041365324338, "grad_norm": 36.25, "learning_rate": 0.0001980923028452512, "loss": 3.0486, "step": 3090 }, { "epoch": 0.09441497809872314, "grad_norm": 36.0, "learning_rate": 0.00019807406924102146, "loss": 3.0647, "step": 3100 }, { "epoch": 0.0947195425442029, "grad_norm": 33.75, "learning_rate": 0.00019805574985548965, "loss": 3.0784, "step": 3110 }, { "epoch": 0.09502410698968264, "grad_norm": 33.25, "learning_rate": 0.00019803734470649832, "loss": 3.0206, "step": 3120 }, { "epoch": 0.0953286714351624, "grad_norm": 33.75, "learning_rate": 0.00019801885381197343, "loss": 3.0644, "step": 3130 }, { "epoch": 0.09563323588064214, "grad_norm": 31.875, "learning_rate": 0.00019800027718992457, "loss": 3.0825, "step": 3140 }, { "epoch": 0.0959378003261219, "grad_norm": 33.75, "learning_rate": 0.00019798161485844472, "loss": 3.0647, "step": 3150 }, { "epoch": 0.09624236477160165, "grad_norm": 34.25, "learning_rate": 0.00019796286683571046, "loss": 3.0852, "step": 3160 }, { "epoch": 0.0965469292170814, "grad_norm": 32.25, "learning_rate": 0.0001979440331399817, "loss": 3.0577, "step": 3170 }, { "epoch": 0.09685149366256116, "grad_norm": 33.25, "learning_rate": 0.00019792511378960185, "loss": 3.0702, "step": 3180 }, { "epoch": 0.0971560581080409, "grad_norm": 33.0, "learning_rate": 0.00019790610880299778, "loss": 3.0809, "step": 3190 }, { "epoch": 0.09746062255352066, "grad_norm": 34.25, "learning_rate": 0.00019788701819867973, "loss": 3.0722, "step": 3200 }, { "epoch": 0.09776518699900041, "grad_norm": 110.0, "learning_rate": 0.0001978678419952413, "loss": 3.0773, "step": 3210 }, { "epoch": 0.09806975144448016, "grad_norm": 33.25, "learning_rate": 0.00019784858021135954, "loss": 3.1056, "step": 3220 }, { "epoch": 0.09837431588995992, "grad_norm": 32.25, "learning_rate": 0.00019782923286579477, "loss": 3.0536, "step": 3230 }, { "epoch": 0.09867888033543967, "grad_norm": 36.5, "learning_rate": 0.0001978097999773907, "loss": 3.0936, "step": 3240 }, { "epoch": 0.09898344478091942, "grad_norm": 35.25, "learning_rate": 0.00019779028156507433, "loss": 3.0971, "step": 3250 }, { "epoch": 0.09928800922639917, "grad_norm": 32.5, "learning_rate": 0.00019777067764785599, "loss": 3.089, "step": 3260 }, { "epoch": 0.09959257367187892, "grad_norm": 32.25, "learning_rate": 0.00019775098824482919, "loss": 3.0739, "step": 3270 }, { "epoch": 0.09989713811735867, "grad_norm": 35.25, "learning_rate": 0.0001977312133751708, "loss": 3.0654, "step": 3280 }, { "epoch": 0.10020170256283843, "grad_norm": 31.875, "learning_rate": 0.00019771135305814098, "loss": 3.0773, "step": 3290 }, { "epoch": 0.10050626700831818, "grad_norm": 33.75, "learning_rate": 0.00019769140731308298, "loss": 3.0472, "step": 3300 }, { "epoch": 0.10081083145379793, "grad_norm": 36.25, "learning_rate": 0.00019767137615942328, "loss": 3.0758, "step": 3310 }, { "epoch": 0.10111539589927768, "grad_norm": 33.0, "learning_rate": 0.00019765125961667166, "loss": 3.0436, "step": 3320 }, { "epoch": 0.10141996034475743, "grad_norm": 32.0, "learning_rate": 0.00019763105770442086, "loss": 3.0214, "step": 3330 }, { "epoch": 0.10172452479023719, "grad_norm": 33.25, "learning_rate": 0.00019761077044234705, "loss": 3.0685, "step": 3340 }, { "epoch": 0.10202908923571694, "grad_norm": 32.25, "learning_rate": 0.00019759039785020925, "loss": 3.0611, "step": 3350 }, { "epoch": 0.10233365368119669, "grad_norm": 33.0, "learning_rate": 0.00019756993994784973, "loss": 3.0609, "step": 3360 }, { "epoch": 0.10263821812667644, "grad_norm": 31.5, "learning_rate": 0.00019754939675519387, "loss": 3.071, "step": 3370 }, { "epoch": 0.1029427825721562, "grad_norm": 34.25, "learning_rate": 0.00019752876829225003, "loss": 3.0651, "step": 3380 }, { "epoch": 0.10324734701763595, "grad_norm": 32.75, "learning_rate": 0.00019750805457910967, "loss": 3.1059, "step": 3390 }, { "epoch": 0.1035519114631157, "grad_norm": 31.25, "learning_rate": 0.0001974872556359473, "loss": 3.1012, "step": 3400 }, { "epoch": 0.10385647590859545, "grad_norm": 33.0, "learning_rate": 0.00019746637148302043, "loss": 3.0897, "step": 3410 }, { "epoch": 0.10416104035407521, "grad_norm": 33.5, "learning_rate": 0.0001974454021406695, "loss": 3.0654, "step": 3420 }, { "epoch": 0.10446560479955495, "grad_norm": 32.0, "learning_rate": 0.00019742434762931804, "loss": 3.0539, "step": 3430 }, { "epoch": 0.10477016924503471, "grad_norm": 30.875, "learning_rate": 0.00019740320796947242, "loss": 3.1112, "step": 3440 }, { "epoch": 0.10507473369051445, "grad_norm": 32.5, "learning_rate": 0.000197381983181722, "loss": 3.0907, "step": 3450 }, { "epoch": 0.10537929813599421, "grad_norm": 35.0, "learning_rate": 0.00019736067328673905, "loss": 3.0714, "step": 3460 }, { "epoch": 0.10568386258147397, "grad_norm": 31.375, "learning_rate": 0.0001973392783052787, "loss": 3.0722, "step": 3470 }, { "epoch": 0.10598842702695371, "grad_norm": 33.0, "learning_rate": 0.00019731779825817893, "loss": 3.0888, "step": 3480 }, { "epoch": 0.10629299147243347, "grad_norm": 34.5, "learning_rate": 0.00019729623316636072, "loss": 3.0933, "step": 3490 }, { "epoch": 0.10659755591791321, "grad_norm": 33.0, "learning_rate": 0.0001972745830508277, "loss": 3.0495, "step": 3500 }, { "epoch": 0.10690212036339297, "grad_norm": 32.5, "learning_rate": 0.00019725284793266638, "loss": 3.0204, "step": 3510 }, { "epoch": 0.10720668480887273, "grad_norm": 32.25, "learning_rate": 0.0001972310278330461, "loss": 3.089, "step": 3520 }, { "epoch": 0.10751124925435247, "grad_norm": 30.5, "learning_rate": 0.00019720912277321892, "loss": 3.0286, "step": 3530 }, { "epoch": 0.10781581369983223, "grad_norm": 33.75, "learning_rate": 0.00019718713277451963, "loss": 3.0335, "step": 3540 }, { "epoch": 0.10812037814531197, "grad_norm": 33.25, "learning_rate": 0.00019716505785836584, "loss": 3.036, "step": 3550 }, { "epoch": 0.10842494259079173, "grad_norm": 32.5, "learning_rate": 0.00019714289804625775, "loss": 3.0345, "step": 3560 }, { "epoch": 0.10872950703627149, "grad_norm": 34.0, "learning_rate": 0.00019712065335977837, "loss": 3.0572, "step": 3570 }, { "epoch": 0.10903407148175123, "grad_norm": 32.5, "learning_rate": 0.0001970983238205932, "loss": 3.0389, "step": 3580 }, { "epoch": 0.10933863592723099, "grad_norm": 31.875, "learning_rate": 0.00019707590945045058, "loss": 3.0689, "step": 3590 }, { "epoch": 0.10964320037271073, "grad_norm": 31.25, "learning_rate": 0.00019705341027118135, "loss": 3.0604, "step": 3600 }, { "epoch": 0.10994776481819049, "grad_norm": 31.375, "learning_rate": 0.000197030826304699, "loss": 3.0623, "step": 3610 }, { "epoch": 0.11025232926367025, "grad_norm": 33.75, "learning_rate": 0.0001970081575729996, "loss": 3.0776, "step": 3620 }, { "epoch": 0.11055689370914999, "grad_norm": 31.5, "learning_rate": 0.0001969854040981617, "loss": 3.0361, "step": 3630 }, { "epoch": 0.11086145815462975, "grad_norm": 33.25, "learning_rate": 0.0001969625659023465, "loss": 3.0564, "step": 3640 }, { "epoch": 0.11116602260010951, "grad_norm": 33.5, "learning_rate": 0.00019693964300779765, "loss": 3.079, "step": 3650 }, { "epoch": 0.11147058704558925, "grad_norm": 33.25, "learning_rate": 0.00019691663543684133, "loss": 3.0167, "step": 3660 }, { "epoch": 0.11177515149106901, "grad_norm": 32.5, "learning_rate": 0.00019689354321188616, "loss": 3.0321, "step": 3670 }, { "epoch": 0.11207971593654875, "grad_norm": 31.375, "learning_rate": 0.0001968703663554232, "loss": 3.0743, "step": 3680 }, { "epoch": 0.11238428038202851, "grad_norm": 32.0, "learning_rate": 0.000196847104890026, "loss": 3.0444, "step": 3690 }, { "epoch": 0.11268884482750827, "grad_norm": 32.75, "learning_rate": 0.00019682375883835042, "loss": 3.0413, "step": 3700 }, { "epoch": 0.11299340927298801, "grad_norm": 32.0, "learning_rate": 0.00019680032822313483, "loss": 3.0285, "step": 3710 }, { "epoch": 0.11329797371846777, "grad_norm": 31.5, "learning_rate": 0.00019677681306719987, "loss": 3.0417, "step": 3720 }, { "epoch": 0.11360253816394751, "grad_norm": 33.25, "learning_rate": 0.00019675321339344853, "loss": 3.0643, "step": 3730 }, { "epoch": 0.11390710260942727, "grad_norm": 33.75, "learning_rate": 0.00019672952922486609, "loss": 3.0784, "step": 3740 }, { "epoch": 0.11421166705490703, "grad_norm": 31.25, "learning_rate": 0.00019670576058452026, "loss": 3.0439, "step": 3750 }, { "epoch": 0.11451623150038677, "grad_norm": 33.75, "learning_rate": 0.00019668190749556086, "loss": 3.0699, "step": 3760 }, { "epoch": 0.11482079594586653, "grad_norm": 31.5, "learning_rate": 0.00019665796998122006, "loss": 3.0191, "step": 3770 }, { "epoch": 0.11512536039134627, "grad_norm": 32.75, "learning_rate": 0.0001966339480648122, "loss": 3.0296, "step": 3780 }, { "epoch": 0.11542992483682603, "grad_norm": 35.25, "learning_rate": 0.0001966098417697339, "loss": 3.0529, "step": 3790 }, { "epoch": 0.11573448928230579, "grad_norm": 36.25, "learning_rate": 0.00019658565111946386, "loss": 3.0297, "step": 3800 }, { "epoch": 0.11603905372778553, "grad_norm": 32.25, "learning_rate": 0.000196561376137563, "loss": 3.0335, "step": 3810 }, { "epoch": 0.11634361817326529, "grad_norm": 33.75, "learning_rate": 0.0001965370168476744, "loss": 3.0324, "step": 3820 }, { "epoch": 0.11664818261874504, "grad_norm": 30.875, "learning_rate": 0.0001965125732735232, "loss": 3.0594, "step": 3830 }, { "epoch": 0.11695274706422479, "grad_norm": 35.25, "learning_rate": 0.00019648804543891666, "loss": 3.0549, "step": 3840 }, { "epoch": 0.11725731150970455, "grad_norm": 36.25, "learning_rate": 0.00019646343336774408, "loss": 3.0312, "step": 3850 }, { "epoch": 0.11756187595518429, "grad_norm": 33.5, "learning_rate": 0.00019643873708397687, "loss": 3.0517, "step": 3860 }, { "epoch": 0.11786644040066405, "grad_norm": 32.25, "learning_rate": 0.00019641395661166837, "loss": 3.0396, "step": 3870 }, { "epoch": 0.1181710048461438, "grad_norm": 31.0, "learning_rate": 0.00019638909197495399, "loss": 3.0604, "step": 3880 }, { "epoch": 0.11847556929162355, "grad_norm": 31.5, "learning_rate": 0.00019636414319805105, "loss": 3.0399, "step": 3890 }, { "epoch": 0.1187801337371033, "grad_norm": 30.5, "learning_rate": 0.0001963391103052589, "loss": 3.0496, "step": 3900 }, { "epoch": 0.11908469818258305, "grad_norm": 32.0, "learning_rate": 0.00019631399332095873, "loss": 3.0315, "step": 3910 }, { "epoch": 0.1193892626280628, "grad_norm": 31.125, "learning_rate": 0.00019628879226961369, "loss": 3.0141, "step": 3920 }, { "epoch": 0.11969382707354256, "grad_norm": 31.25, "learning_rate": 0.00019626350717576876, "loss": 3.0221, "step": 3930 }, { "epoch": 0.11999839151902231, "grad_norm": 31.5, "learning_rate": 0.00019623813806405083, "loss": 3.0005, "step": 3940 }, { "epoch": 0.12030295596450206, "grad_norm": 32.75, "learning_rate": 0.00019621268495916865, "loss": 3.0391, "step": 3950 }, { "epoch": 0.12060752040998181, "grad_norm": 34.25, "learning_rate": 0.00019618714788591262, "loss": 3.0575, "step": 3960 }, { "epoch": 0.12091208485546157, "grad_norm": 34.25, "learning_rate": 0.0001961615268691551, "loss": 3.0304, "step": 3970 }, { "epoch": 0.12121664930094132, "grad_norm": 31.875, "learning_rate": 0.00019613582193385014, "loss": 3.0532, "step": 3980 }, { "epoch": 0.12152121374642107, "grad_norm": 32.75, "learning_rate": 0.00019611003310503344, "loss": 3.0559, "step": 3990 }, { "epoch": 0.12182577819190082, "grad_norm": 32.5, "learning_rate": 0.00019608416040782257, "loss": 3.0319, "step": 4000 }, { "epoch": 0.12213034263738057, "grad_norm": 31.0, "learning_rate": 0.0001960582038674166, "loss": 3.0437, "step": 4010 }, { "epoch": 0.12243490708286033, "grad_norm": 33.5, "learning_rate": 0.00019603216350909654, "loss": 3.0228, "step": 4020 }, { "epoch": 0.12273947152834008, "grad_norm": 31.875, "learning_rate": 0.00019600603935822467, "loss": 3.0669, "step": 4030 }, { "epoch": 0.12304403597381983, "grad_norm": 34.25, "learning_rate": 0.0001959798314402452, "loss": 3.0094, "step": 4040 }, { "epoch": 0.12334860041929958, "grad_norm": 35.0, "learning_rate": 0.0001959535397806837, "loss": 3.0271, "step": 4050 }, { "epoch": 0.12365316486477934, "grad_norm": 32.5, "learning_rate": 0.0001959271644051475, "loss": 3.0345, "step": 4060 }, { "epoch": 0.12395772931025908, "grad_norm": 33.25, "learning_rate": 0.00019590070533932524, "loss": 3.0281, "step": 4070 }, { "epoch": 0.12426229375573884, "grad_norm": 32.5, "learning_rate": 0.00019587416260898733, "loss": 3.0209, "step": 4080 }, { "epoch": 0.12456685820121859, "grad_norm": 32.75, "learning_rate": 0.00019584753623998544, "loss": 3.0444, "step": 4090 }, { "epoch": 0.12487142264669834, "grad_norm": 32.25, "learning_rate": 0.00019582082625825282, "loss": 3.0576, "step": 4100 }, { "epoch": 0.1251759870921781, "grad_norm": 32.5, "learning_rate": 0.00019579403268980415, "loss": 3.0391, "step": 4110 }, { "epoch": 0.12548055153765786, "grad_norm": 30.125, "learning_rate": 0.00019576715556073545, "loss": 3.0036, "step": 4120 }, { "epoch": 0.1257851159831376, "grad_norm": 31.75, "learning_rate": 0.00019574019489722427, "loss": 3.0414, "step": 4130 }, { "epoch": 0.12608968042861735, "grad_norm": 32.25, "learning_rate": 0.00019571315072552934, "loss": 3.0004, "step": 4140 }, { "epoch": 0.12639424487409712, "grad_norm": 32.25, "learning_rate": 0.00019568602307199083, "loss": 3.0004, "step": 4150 }, { "epoch": 0.12669880931957686, "grad_norm": 32.5, "learning_rate": 0.00019565881196303023, "loss": 3.0523, "step": 4160 }, { "epoch": 0.1270033737650566, "grad_norm": 30.75, "learning_rate": 0.00019563151742515024, "loss": 3.0247, "step": 4170 }, { "epoch": 0.12730793821053635, "grad_norm": 33.5, "learning_rate": 0.0001956041394849349, "loss": 3.0286, "step": 4180 }, { "epoch": 0.12761250265601612, "grad_norm": 36.5, "learning_rate": 0.0001955766781690494, "loss": 3.0018, "step": 4190 }, { "epoch": 0.12791706710149586, "grad_norm": 34.5, "learning_rate": 0.0001955491335042402, "loss": 3.0041, "step": 4200 }, { "epoch": 0.1282216315469756, "grad_norm": 33.75, "learning_rate": 0.00019552150551733496, "loss": 3.0455, "step": 4210 }, { "epoch": 0.12852619599245538, "grad_norm": 33.0, "learning_rate": 0.00019549379423524233, "loss": 3.047, "step": 4220 }, { "epoch": 0.12883076043793512, "grad_norm": 32.5, "learning_rate": 0.00019546599968495233, "loss": 3.044, "step": 4230 }, { "epoch": 0.12913532488341486, "grad_norm": 35.75, "learning_rate": 0.00019543812189353585, "loss": 2.9689, "step": 4240 }, { "epoch": 0.12943988932889464, "grad_norm": 32.0, "learning_rate": 0.00019541016088814504, "loss": 3.0118, "step": 4250 }, { "epoch": 0.12974445377437438, "grad_norm": 31.875, "learning_rate": 0.00019538211669601298, "loss": 3.021, "step": 4260 }, { "epoch": 0.13004901821985412, "grad_norm": 33.25, "learning_rate": 0.00019535398934445383, "loss": 2.963, "step": 4270 }, { "epoch": 0.13035358266533387, "grad_norm": 33.75, "learning_rate": 0.00019532577886086268, "loss": 3.0508, "step": 4280 }, { "epoch": 0.13065814711081364, "grad_norm": 32.0, "learning_rate": 0.00019529748527271569, "loss": 2.9963, "step": 4290 }, { "epoch": 0.13096271155629338, "grad_norm": 33.75, "learning_rate": 0.00019526910860756983, "loss": 3.03, "step": 4300 }, { "epoch": 0.13126727600177313, "grad_norm": 32.25, "learning_rate": 0.00019524064889306312, "loss": 3.0476, "step": 4310 }, { "epoch": 0.1315718404472529, "grad_norm": 30.5, "learning_rate": 0.0001952121061569144, "loss": 3.0049, "step": 4320 }, { "epoch": 0.13187640489273264, "grad_norm": 31.625, "learning_rate": 0.00019518348042692335, "loss": 3.0274, "step": 4330 }, { "epoch": 0.13218096933821238, "grad_norm": 31.5, "learning_rate": 0.00019515477173097048, "loss": 3.0542, "step": 4340 }, { "epoch": 0.13248553378369216, "grad_norm": 30.125, "learning_rate": 0.0001951259800970172, "loss": 3.016, "step": 4350 }, { "epoch": 0.1327900982291719, "grad_norm": 34.75, "learning_rate": 0.00019509710555310557, "loss": 3.0243, "step": 4360 }, { "epoch": 0.13309466267465164, "grad_norm": 32.25, "learning_rate": 0.0001950681481273585, "loss": 3.0205, "step": 4370 }, { "epoch": 0.13339922712013141, "grad_norm": 32.75, "learning_rate": 0.00019503910784797958, "loss": 3.0043, "step": 4380 }, { "epoch": 0.13370379156561116, "grad_norm": 33.0, "learning_rate": 0.00019500998474325312, "loss": 3.0291, "step": 4390 }, { "epoch": 0.1340083560110909, "grad_norm": 32.75, "learning_rate": 0.00019498077884154405, "loss": 3.0103, "step": 4400 }, { "epoch": 0.13431292045657064, "grad_norm": 30.5, "learning_rate": 0.000194951490171298, "loss": 3.0377, "step": 4410 }, { "epoch": 0.13461748490205042, "grad_norm": 32.25, "learning_rate": 0.00019492211876104122, "loss": 3.0271, "step": 4420 }, { "epoch": 0.13492204934753016, "grad_norm": 31.625, "learning_rate": 0.00019489266463938048, "loss": 3.0186, "step": 4430 }, { "epoch": 0.1352266137930099, "grad_norm": 33.25, "learning_rate": 0.00019486312783500312, "loss": 3.0235, "step": 4440 }, { "epoch": 0.13553117823848967, "grad_norm": 30.875, "learning_rate": 0.0001948335083766771, "loss": 3.0145, "step": 4450 }, { "epoch": 0.13583574268396942, "grad_norm": 31.875, "learning_rate": 0.0001948038062932507, "loss": 3.0218, "step": 4460 }, { "epoch": 0.13614030712944916, "grad_norm": 33.0, "learning_rate": 0.0001947740216136529, "loss": 3.0052, "step": 4470 }, { "epoch": 0.13644487157492893, "grad_norm": 30.75, "learning_rate": 0.00019474415436689305, "loss": 3.0256, "step": 4480 }, { "epoch": 0.13674943602040868, "grad_norm": 32.25, "learning_rate": 0.00019471420458206078, "loss": 3.0413, "step": 4490 }, { "epoch": 0.13705400046588842, "grad_norm": 31.5, "learning_rate": 0.00019468417228832625, "loss": 3.0283, "step": 4500 }, { "epoch": 0.13735856491136816, "grad_norm": 31.0, "learning_rate": 0.00019465405751493996, "loss": 3.0019, "step": 4510 }, { "epoch": 0.13766312935684794, "grad_norm": 31.875, "learning_rate": 0.00019462386029123273, "loss": 3.0169, "step": 4520 }, { "epoch": 0.13796769380232768, "grad_norm": 32.0, "learning_rate": 0.00019459358064661563, "loss": 3.0353, "step": 4530 }, { "epoch": 0.13827225824780742, "grad_norm": 31.75, "learning_rate": 0.00019456321861058013, "loss": 2.9747, "step": 4540 }, { "epoch": 0.1385768226932872, "grad_norm": 32.25, "learning_rate": 0.00019453277421269783, "loss": 3.0499, "step": 4550 }, { "epoch": 0.13888138713876694, "grad_norm": 33.25, "learning_rate": 0.00019450224748262062, "loss": 2.9975, "step": 4560 }, { "epoch": 0.13918595158424668, "grad_norm": 30.875, "learning_rate": 0.00019447163845008053, "loss": 3.0046, "step": 4570 }, { "epoch": 0.13949051602972645, "grad_norm": 32.75, "learning_rate": 0.00019444094714488977, "loss": 3.0011, "step": 4580 }, { "epoch": 0.1397950804752062, "grad_norm": 32.25, "learning_rate": 0.00019441017359694067, "loss": 3.0309, "step": 4590 }, { "epoch": 0.14009964492068594, "grad_norm": 31.0, "learning_rate": 0.00019437931783620565, "loss": 3.0251, "step": 4600 }, { "epoch": 0.1404042093661657, "grad_norm": 32.5, "learning_rate": 0.00019434837989273733, "loss": 3.0136, "step": 4610 }, { "epoch": 0.14070877381164545, "grad_norm": 31.75, "learning_rate": 0.00019431735979666816, "loss": 3.0024, "step": 4620 }, { "epoch": 0.1410133382571252, "grad_norm": 31.75, "learning_rate": 0.0001942862575782108, "loss": 2.987, "step": 4630 }, { "epoch": 0.14131790270260494, "grad_norm": 33.25, "learning_rate": 0.00019425507326765773, "loss": 3.0347, "step": 4640 }, { "epoch": 0.1416224671480847, "grad_norm": 34.0, "learning_rate": 0.0001942238068953815, "loss": 3.0329, "step": 4650 }, { "epoch": 0.14192703159356446, "grad_norm": 30.75, "learning_rate": 0.00019419245849183452, "loss": 3.016, "step": 4660 }, { "epoch": 0.1422315960390442, "grad_norm": 34.0, "learning_rate": 0.00019416102808754917, "loss": 2.9799, "step": 4670 }, { "epoch": 0.14253616048452397, "grad_norm": 33.0, "learning_rate": 0.0001941295157131376, "loss": 3.0182, "step": 4680 }, { "epoch": 0.14284072493000372, "grad_norm": 31.125, "learning_rate": 0.00019409792139929192, "loss": 2.9854, "step": 4690 }, { "epoch": 0.14314528937548346, "grad_norm": 31.25, "learning_rate": 0.00019406624517678389, "loss": 3.0232, "step": 4700 }, { "epoch": 0.14344985382096323, "grad_norm": 33.5, "learning_rate": 0.00019403448707646517, "loss": 2.9764, "step": 4710 }, { "epoch": 0.14375441826644297, "grad_norm": 31.5, "learning_rate": 0.00019400264712926711, "loss": 2.9862, "step": 4720 }, { "epoch": 0.14405898271192272, "grad_norm": 31.625, "learning_rate": 0.0001939707253662008, "loss": 3.0002, "step": 4730 }, { "epoch": 0.14436354715740246, "grad_norm": 31.125, "learning_rate": 0.000193938721818357, "loss": 3.0007, "step": 4740 }, { "epoch": 0.14466811160288223, "grad_norm": 32.5, "learning_rate": 0.00019390663651690614, "loss": 3.0324, "step": 4750 }, { "epoch": 0.14497267604836198, "grad_norm": 30.625, "learning_rate": 0.00019387446949309828, "loss": 3.0114, "step": 4760 }, { "epoch": 0.14527724049384172, "grad_norm": 32.25, "learning_rate": 0.00019384222077826305, "loss": 3.0143, "step": 4770 }, { "epoch": 0.1455818049393215, "grad_norm": 31.125, "learning_rate": 0.00019380989040380966, "loss": 2.9915, "step": 4780 }, { "epoch": 0.14588636938480123, "grad_norm": 31.375, "learning_rate": 0.00019377747840122685, "loss": 3.0152, "step": 4790 }, { "epoch": 0.14619093383028098, "grad_norm": 32.25, "learning_rate": 0.00019374498480208288, "loss": 3.0002, "step": 4800 }, { "epoch": 0.14649549827576075, "grad_norm": 31.0, "learning_rate": 0.00019371240963802542, "loss": 3.0409, "step": 4810 }, { "epoch": 0.1468000627212405, "grad_norm": 34.25, "learning_rate": 0.00019367975294078168, "loss": 3.0128, "step": 4820 }, { "epoch": 0.14710462716672024, "grad_norm": 33.0, "learning_rate": 0.0001936470147421582, "loss": 3.0081, "step": 4830 }, { "epoch": 0.1474091916122, "grad_norm": 30.625, "learning_rate": 0.00019361419507404095, "loss": 3.0062, "step": 4840 }, { "epoch": 0.14771375605767975, "grad_norm": 32.0, "learning_rate": 0.00019358129396839524, "loss": 2.9668, "step": 4850 }, { "epoch": 0.1480183205031595, "grad_norm": 34.0, "learning_rate": 0.00019354831145726564, "loss": 2.9909, "step": 4860 }, { "epoch": 0.14832288494863924, "grad_norm": 30.625, "learning_rate": 0.0001935152475727761, "loss": 3.0341, "step": 4870 }, { "epoch": 0.148627449394119, "grad_norm": 31.75, "learning_rate": 0.00019348210234712972, "loss": 3.0011, "step": 4880 }, { "epoch": 0.14893201383959875, "grad_norm": 31.875, "learning_rate": 0.00019344887581260894, "loss": 3.0007, "step": 4890 }, { "epoch": 0.1492365782850785, "grad_norm": 33.0, "learning_rate": 0.00019341556800157528, "loss": 3.0298, "step": 4900 }, { "epoch": 0.14954114273055827, "grad_norm": 37.0, "learning_rate": 0.0001933821789464695, "loss": 2.9966, "step": 4910 }, { "epoch": 0.149845707176038, "grad_norm": 31.625, "learning_rate": 0.00019334870867981148, "loss": 2.9991, "step": 4920 }, { "epoch": 0.15015027162151776, "grad_norm": 33.25, "learning_rate": 0.00019331515723420016, "loss": 2.9814, "step": 4930 }, { "epoch": 0.15045483606699753, "grad_norm": 32.0, "learning_rate": 0.00019328152464231354, "loss": 2.9937, "step": 4940 }, { "epoch": 0.15075940051247727, "grad_norm": 31.625, "learning_rate": 0.00019324781093690873, "loss": 3.0062, "step": 4950 }, { "epoch": 0.15106396495795701, "grad_norm": 32.25, "learning_rate": 0.00019321401615082177, "loss": 2.9901, "step": 4960 }, { "epoch": 0.15136852940343679, "grad_norm": 33.5, "learning_rate": 0.0001931801403169677, "loss": 3.0008, "step": 4970 }, { "epoch": 0.15167309384891653, "grad_norm": 30.75, "learning_rate": 0.00019314618346834045, "loss": 2.9803, "step": 4980 }, { "epoch": 0.15197765829439627, "grad_norm": 31.625, "learning_rate": 0.00019311214563801294, "loss": 3.0194, "step": 4990 }, { "epoch": 0.15228222273987602, "grad_norm": 32.25, "learning_rate": 0.0001930780268591369, "loss": 2.9631, "step": 5000 }, { "epoch": 0.1525867871853558, "grad_norm": 31.0, "learning_rate": 0.00019304382716494293, "loss": 2.9877, "step": 5010 }, { "epoch": 0.15289135163083553, "grad_norm": 31.375, "learning_rate": 0.0001930095465887404, "loss": 3.0177, "step": 5020 }, { "epoch": 0.15319591607631527, "grad_norm": 36.5, "learning_rate": 0.00019297518516391755, "loss": 3.0044, "step": 5030 }, { "epoch": 0.15350048052179505, "grad_norm": 31.875, "learning_rate": 0.0001929407429239412, "loss": 3.0146, "step": 5040 }, { "epoch": 0.1538050449672748, "grad_norm": 31.5, "learning_rate": 0.00019290621990235708, "loss": 2.9792, "step": 5050 }, { "epoch": 0.15410960941275453, "grad_norm": 31.625, "learning_rate": 0.00019287161613278943, "loss": 3.0115, "step": 5060 }, { "epoch": 0.1544141738582343, "grad_norm": 31.875, "learning_rate": 0.00019283693164894123, "loss": 3.0499, "step": 5070 }, { "epoch": 0.15471873830371405, "grad_norm": 32.5, "learning_rate": 0.00019280216648459407, "loss": 3.0157, "step": 5080 }, { "epoch": 0.1550233027491938, "grad_norm": 32.25, "learning_rate": 0.00019276732067360808, "loss": 2.9696, "step": 5090 }, { "epoch": 0.15532786719467354, "grad_norm": 32.5, "learning_rate": 0.00019273239424992194, "loss": 2.9749, "step": 5100 }, { "epoch": 0.1556324316401533, "grad_norm": 31.625, "learning_rate": 0.00019269738724755287, "loss": 2.9894, "step": 5110 }, { "epoch": 0.15593699608563305, "grad_norm": 31.75, "learning_rate": 0.00019266229970059655, "loss": 3.0214, "step": 5120 }, { "epoch": 0.1562415605311128, "grad_norm": 31.125, "learning_rate": 0.00019262713164322715, "loss": 3.0344, "step": 5130 }, { "epoch": 0.15654612497659257, "grad_norm": 29.5, "learning_rate": 0.00019259188310969723, "loss": 2.9795, "step": 5140 }, { "epoch": 0.1568506894220723, "grad_norm": 30.5, "learning_rate": 0.00019255655413433768, "loss": 3.0007, "step": 5150 }, { "epoch": 0.15715525386755205, "grad_norm": 32.0, "learning_rate": 0.0001925211447515578, "loss": 2.994, "step": 5160 }, { "epoch": 0.15745981831303182, "grad_norm": 29.75, "learning_rate": 0.00019248565499584518, "loss": 2.9505, "step": 5170 }, { "epoch": 0.15776438275851157, "grad_norm": 30.625, "learning_rate": 0.0001924500849017657, "loss": 2.9909, "step": 5180 }, { "epoch": 0.1580689472039913, "grad_norm": 32.75, "learning_rate": 0.00019241443450396353, "loss": 2.9822, "step": 5190 }, { "epoch": 0.15837351164947108, "grad_norm": 33.25, "learning_rate": 0.0001923787038371609, "loss": 3.0057, "step": 5200 }, { "epoch": 0.15867807609495083, "grad_norm": 31.125, "learning_rate": 0.00019234289293615847, "loss": 3.0189, "step": 5210 }, { "epoch": 0.15898264054043057, "grad_norm": 32.0, "learning_rate": 0.00019230700183583478, "loss": 2.9555, "step": 5220 }, { "epoch": 0.1592872049859103, "grad_norm": 31.125, "learning_rate": 0.00019227103057114665, "loss": 2.9852, "step": 5230 }, { "epoch": 0.15959176943139008, "grad_norm": 31.875, "learning_rate": 0.00019223497917712898, "loss": 3.0143, "step": 5240 }, { "epoch": 0.15989633387686983, "grad_norm": 32.0, "learning_rate": 0.0001921988476888946, "loss": 3.0024, "step": 5250 }, { "epoch": 0.16020089832234957, "grad_norm": 33.5, "learning_rate": 0.00019216263614163439, "loss": 3.0136, "step": 5260 }, { "epoch": 0.16050546276782934, "grad_norm": 32.25, "learning_rate": 0.0001921263445706173, "loss": 3.0112, "step": 5270 }, { "epoch": 0.1608100272133091, "grad_norm": 30.25, "learning_rate": 0.00019208997301119012, "loss": 2.998, "step": 5280 }, { "epoch": 0.16111459165878883, "grad_norm": 31.125, "learning_rate": 0.00019205352149877756, "loss": 2.9792, "step": 5290 }, { "epoch": 0.1614191561042686, "grad_norm": 32.0, "learning_rate": 0.00019201699006888222, "loss": 2.9628, "step": 5300 }, { "epoch": 0.16172372054974835, "grad_norm": 32.5, "learning_rate": 0.0001919803787570846, "loss": 2.9851, "step": 5310 }, { "epoch": 0.1620282849952281, "grad_norm": 34.75, "learning_rate": 0.00019194368759904284, "loss": 2.9862, "step": 5320 }, { "epoch": 0.16233284944070783, "grad_norm": 32.25, "learning_rate": 0.000191906916630493, "loss": 2.9901, "step": 5330 }, { "epoch": 0.1626374138861876, "grad_norm": 32.75, "learning_rate": 0.00019187006588724877, "loss": 2.9668, "step": 5340 }, { "epoch": 0.16294197833166735, "grad_norm": 32.5, "learning_rate": 0.00019183313540520168, "loss": 2.9988, "step": 5350 }, { "epoch": 0.1632465427771471, "grad_norm": 32.25, "learning_rate": 0.0001917961252203207, "loss": 2.9761, "step": 5360 }, { "epoch": 0.16355110722262686, "grad_norm": 31.5, "learning_rate": 0.00019175903536865267, "loss": 2.9789, "step": 5370 }, { "epoch": 0.1638556716681066, "grad_norm": 31.75, "learning_rate": 0.00019172186588632185, "loss": 3.0041, "step": 5380 }, { "epoch": 0.16416023611358635, "grad_norm": 29.625, "learning_rate": 0.0001916846168095301, "loss": 2.9869, "step": 5390 }, { "epoch": 0.16446480055906612, "grad_norm": 31.625, "learning_rate": 0.0001916472881745569, "loss": 2.9683, "step": 5400 }, { "epoch": 0.16476936500454586, "grad_norm": 30.25, "learning_rate": 0.00019160988001775902, "loss": 2.9751, "step": 5410 }, { "epoch": 0.1650739294500256, "grad_norm": 31.0, "learning_rate": 0.0001915723923755709, "loss": 2.987, "step": 5420 }, { "epoch": 0.16537849389550538, "grad_norm": 31.25, "learning_rate": 0.0001915348252845043, "loss": 2.9775, "step": 5430 }, { "epoch": 0.16568305834098512, "grad_norm": 32.75, "learning_rate": 0.00019149717878114822, "loss": 2.973, "step": 5440 }, { "epoch": 0.16598762278646487, "grad_norm": 31.375, "learning_rate": 0.00019145945290216927, "loss": 2.9704, "step": 5450 }, { "epoch": 0.1662921872319446, "grad_norm": 32.5, "learning_rate": 0.0001914216476843112, "loss": 2.9788, "step": 5460 }, { "epoch": 0.16659675167742438, "grad_norm": 30.875, "learning_rate": 0.0001913837631643951, "loss": 2.9749, "step": 5470 }, { "epoch": 0.16690131612290413, "grad_norm": 33.25, "learning_rate": 0.00019134579937931919, "loss": 3.0028, "step": 5480 }, { "epoch": 0.16720588056838387, "grad_norm": 31.25, "learning_rate": 0.00019130775636605905, "loss": 2.98, "step": 5490 }, { "epoch": 0.16751044501386364, "grad_norm": 32.25, "learning_rate": 0.0001912696341616673, "loss": 2.9639, "step": 5500 }, { "epoch": 0.16781500945934338, "grad_norm": 32.5, "learning_rate": 0.00019123143280327378, "loss": 2.9897, "step": 5510 }, { "epoch": 0.16811957390482313, "grad_norm": 30.625, "learning_rate": 0.00019119315232808538, "loss": 3.0083, "step": 5520 }, { "epoch": 0.1684241383503029, "grad_norm": 30.125, "learning_rate": 0.000191154792773386, "loss": 2.9791, "step": 5530 }, { "epoch": 0.16872870279578264, "grad_norm": 30.5, "learning_rate": 0.00019111635417653661, "loss": 2.959, "step": 5540 }, { "epoch": 0.16903326724126239, "grad_norm": 31.5, "learning_rate": 0.00019107783657497523, "loss": 2.9657, "step": 5550 }, { "epoch": 0.16933783168674213, "grad_norm": 31.375, "learning_rate": 0.0001910392400062167, "loss": 2.9734, "step": 5560 }, { "epoch": 0.1696423961322219, "grad_norm": 30.75, "learning_rate": 0.00019100056450785285, "loss": 2.9743, "step": 5570 }, { "epoch": 0.16994696057770164, "grad_norm": 32.0, "learning_rate": 0.00019096181011755238, "loss": 2.9587, "step": 5580 }, { "epoch": 0.1702515250231814, "grad_norm": 31.625, "learning_rate": 0.00019092297687306082, "loss": 3.009, "step": 5590 }, { "epoch": 0.17055608946866116, "grad_norm": 32.25, "learning_rate": 0.00019088406481220044, "loss": 2.9205, "step": 5600 }, { "epoch": 0.1708606539141409, "grad_norm": 29.875, "learning_rate": 0.00019084507397287042, "loss": 2.9857, "step": 5610 }, { "epoch": 0.17116521835962065, "grad_norm": 32.75, "learning_rate": 0.00019080600439304654, "loss": 2.9933, "step": 5620 }, { "epoch": 0.17146978280510042, "grad_norm": 30.625, "learning_rate": 0.00019076685611078123, "loss": 2.9659, "step": 5630 }, { "epoch": 0.17177434725058016, "grad_norm": 33.25, "learning_rate": 0.00019072762916420383, "loss": 2.969, "step": 5640 }, { "epoch": 0.1720789116960599, "grad_norm": 31.5, "learning_rate": 0.00019068832359151998, "loss": 2.9692, "step": 5650 }, { "epoch": 0.17238347614153968, "grad_norm": 31.25, "learning_rate": 0.00019064893943101206, "loss": 2.953, "step": 5660 }, { "epoch": 0.17268804058701942, "grad_norm": 31.5, "learning_rate": 0.000190609476721039, "loss": 2.9764, "step": 5670 }, { "epoch": 0.17299260503249916, "grad_norm": 31.5, "learning_rate": 0.0001905699355000362, "loss": 2.9467, "step": 5680 }, { "epoch": 0.1732971694779789, "grad_norm": 33.0, "learning_rate": 0.00019053031580651555, "loss": 2.9835, "step": 5690 }, { "epoch": 0.17360173392345868, "grad_norm": 31.875, "learning_rate": 0.0001904906176790653, "loss": 2.9753, "step": 5700 }, { "epoch": 0.17390629836893842, "grad_norm": 32.75, "learning_rate": 0.00019045084115635022, "loss": 2.9805, "step": 5710 }, { "epoch": 0.17421086281441817, "grad_norm": 32.5, "learning_rate": 0.00019041098627711127, "loss": 2.9972, "step": 5720 }, { "epoch": 0.17451542725989794, "grad_norm": 32.75, "learning_rate": 0.00019037105308016588, "loss": 2.9896, "step": 5730 }, { "epoch": 0.17481999170537768, "grad_norm": 32.75, "learning_rate": 0.00019033104160440774, "loss": 2.9859, "step": 5740 }, { "epoch": 0.17512455615085742, "grad_norm": 42.5, "learning_rate": 0.00019029095188880662, "loss": 2.9704, "step": 5750 }, { "epoch": 0.1754291205963372, "grad_norm": 32.0, "learning_rate": 0.0001902507839724087, "loss": 2.9833, "step": 5760 }, { "epoch": 0.17573368504181694, "grad_norm": 32.75, "learning_rate": 0.00019021053789433618, "loss": 2.9906, "step": 5770 }, { "epoch": 0.17603824948729668, "grad_norm": 30.625, "learning_rate": 0.00019017021369378747, "loss": 2.9423, "step": 5780 }, { "epoch": 0.17634281393277643, "grad_norm": 31.375, "learning_rate": 0.00019012981141003704, "loss": 2.9527, "step": 5790 }, { "epoch": 0.1766473783782562, "grad_norm": 30.625, "learning_rate": 0.0001900893310824354, "loss": 2.9817, "step": 5800 }, { "epoch": 0.17695194282373594, "grad_norm": 31.625, "learning_rate": 0.0001900487727504091, "loss": 2.9785, "step": 5810 }, { "epoch": 0.17725650726921569, "grad_norm": 31.75, "learning_rate": 0.00019000813645346062, "loss": 2.9489, "step": 5820 }, { "epoch": 0.17756107171469546, "grad_norm": 33.0, "learning_rate": 0.0001899674222311684, "loss": 2.9547, "step": 5830 }, { "epoch": 0.1778656361601752, "grad_norm": 32.25, "learning_rate": 0.00018992663012318685, "loss": 2.9898, "step": 5840 }, { "epoch": 0.17817020060565494, "grad_norm": 31.875, "learning_rate": 0.0001898857601692461, "loss": 2.9715, "step": 5850 }, { "epoch": 0.17847476505113472, "grad_norm": 31.625, "learning_rate": 0.00018984481240915215, "loss": 2.9539, "step": 5860 }, { "epoch": 0.17877932949661446, "grad_norm": 32.25, "learning_rate": 0.00018980378688278685, "loss": 2.9543, "step": 5870 }, { "epoch": 0.1790838939420942, "grad_norm": 30.125, "learning_rate": 0.0001897626836301077, "loss": 2.9741, "step": 5880 }, { "epoch": 0.17938845838757397, "grad_norm": 31.0, "learning_rate": 0.00018972150269114802, "loss": 2.9283, "step": 5890 }, { "epoch": 0.17969302283305372, "grad_norm": 31.125, "learning_rate": 0.00018968024410601658, "loss": 2.9666, "step": 5900 }, { "epoch": 0.17999758727853346, "grad_norm": 30.5, "learning_rate": 0.00018963890791489802, "loss": 3.0005, "step": 5910 }, { "epoch": 0.1803021517240132, "grad_norm": 31.375, "learning_rate": 0.0001895974941580524, "loss": 2.9489, "step": 5920 }, { "epoch": 0.18060671616949298, "grad_norm": 32.5, "learning_rate": 0.00018955600287581543, "loss": 2.9954, "step": 5930 }, { "epoch": 0.18091128061497272, "grad_norm": 31.5, "learning_rate": 0.00018951443410859827, "loss": 2.9726, "step": 5940 }, { "epoch": 0.18121584506045246, "grad_norm": 31.625, "learning_rate": 0.0001894727878968875, "loss": 2.977, "step": 5950 }, { "epoch": 0.18152040950593223, "grad_norm": 29.75, "learning_rate": 0.00018943106428124528, "loss": 2.9779, "step": 5960 }, { "epoch": 0.18182497395141198, "grad_norm": 31.0, "learning_rate": 0.000189389263302309, "loss": 2.9753, "step": 5970 }, { "epoch": 0.18212953839689172, "grad_norm": 32.25, "learning_rate": 0.00018934738500079148, "loss": 2.9472, "step": 5980 }, { "epoch": 0.1824341028423715, "grad_norm": 31.625, "learning_rate": 0.0001893054294174808, "loss": 2.9572, "step": 5990 }, { "epoch": 0.18273866728785124, "grad_norm": 31.0, "learning_rate": 0.0001892633965932404, "loss": 2.9673, "step": 6000 }, { "epoch": 0.18304323173333098, "grad_norm": 32.0, "learning_rate": 0.00018922128656900892, "loss": 2.9665, "step": 6010 }, { "epoch": 0.18334779617881075, "grad_norm": 33.0, "learning_rate": 0.00018917909938580009, "loss": 2.9329, "step": 6020 }, { "epoch": 0.1836523606242905, "grad_norm": 30.875, "learning_rate": 0.00018913683508470287, "loss": 2.9441, "step": 6030 }, { "epoch": 0.18395692506977024, "grad_norm": 31.875, "learning_rate": 0.0001890944937068814, "loss": 2.9859, "step": 6040 }, { "epoch": 0.18426148951524998, "grad_norm": 31.5, "learning_rate": 0.00018905207529357473, "loss": 2.9841, "step": 6050 }, { "epoch": 0.18456605396072975, "grad_norm": 31.875, "learning_rate": 0.0001890095798860971, "loss": 2.9773, "step": 6060 }, { "epoch": 0.1848706184062095, "grad_norm": 31.0, "learning_rate": 0.0001889670075258376, "loss": 2.9605, "step": 6070 }, { "epoch": 0.18517518285168924, "grad_norm": 32.5, "learning_rate": 0.00018892435825426036, "loss": 2.9774, "step": 6080 }, { "epoch": 0.185479747297169, "grad_norm": 30.75, "learning_rate": 0.00018888163211290443, "loss": 2.9371, "step": 6090 }, { "epoch": 0.18578431174264876, "grad_norm": 33.25, "learning_rate": 0.00018883882914338365, "loss": 2.9873, "step": 6100 }, { "epoch": 0.1860888761881285, "grad_norm": 32.75, "learning_rate": 0.00018879594938738675, "loss": 2.9686, "step": 6110 }, { "epoch": 0.18639344063360827, "grad_norm": 31.75, "learning_rate": 0.00018875299288667724, "loss": 3.0122, "step": 6120 }, { "epoch": 0.18669800507908801, "grad_norm": 34.25, "learning_rate": 0.0001887099596830933, "loss": 2.9741, "step": 6130 }, { "epoch": 0.18700256952456776, "grad_norm": 33.25, "learning_rate": 0.00018866684981854793, "loss": 2.9903, "step": 6140 }, { "epoch": 0.1873071339700475, "grad_norm": 30.25, "learning_rate": 0.00018862366333502882, "loss": 2.9549, "step": 6150 }, { "epoch": 0.18761169841552727, "grad_norm": 31.375, "learning_rate": 0.0001885804002745981, "loss": 3.0144, "step": 6160 }, { "epoch": 0.18791626286100702, "grad_norm": 31.75, "learning_rate": 0.0001885370606793926, "loss": 2.9554, "step": 6170 }, { "epoch": 0.18822082730648676, "grad_norm": 30.875, "learning_rate": 0.00018849364459162376, "loss": 2.9857, "step": 6180 }, { "epoch": 0.18852539175196653, "grad_norm": 32.5, "learning_rate": 0.0001884501520535774, "loss": 2.9856, "step": 6190 }, { "epoch": 0.18882995619744627, "grad_norm": 31.75, "learning_rate": 0.00018840658310761388, "loss": 2.9944, "step": 6200 }, { "epoch": 0.18913452064292602, "grad_norm": 32.5, "learning_rate": 0.00018836293779616792, "loss": 2.961, "step": 6210 }, { "epoch": 0.1894390850884058, "grad_norm": 33.0, "learning_rate": 0.00018831921616174863, "loss": 2.9724, "step": 6220 }, { "epoch": 0.18974364953388553, "grad_norm": 31.625, "learning_rate": 0.0001882754182469395, "loss": 2.9659, "step": 6230 }, { "epoch": 0.19004821397936528, "grad_norm": 31.0, "learning_rate": 0.0001882315440943983, "loss": 2.9597, "step": 6240 }, { "epoch": 0.19035277842484505, "grad_norm": 32.5, "learning_rate": 0.00018818759374685703, "loss": 2.9425, "step": 6250 }, { "epoch": 0.1906573428703248, "grad_norm": 31.875, "learning_rate": 0.00018814356724712188, "loss": 2.9622, "step": 6260 }, { "epoch": 0.19096190731580454, "grad_norm": 31.25, "learning_rate": 0.0001880994646380733, "loss": 2.9708, "step": 6270 }, { "epoch": 0.19126647176128428, "grad_norm": 35.75, "learning_rate": 0.00018805528596266573, "loss": 2.942, "step": 6280 }, { "epoch": 0.19157103620676405, "grad_norm": 32.5, "learning_rate": 0.0001880110312639278, "loss": 2.9626, "step": 6290 }, { "epoch": 0.1918756006522438, "grad_norm": 33.25, "learning_rate": 0.0001879667005849622, "loss": 2.9873, "step": 6300 }, { "epoch": 0.19218016509772354, "grad_norm": 28.875, "learning_rate": 0.00018792229396894555, "loss": 2.9749, "step": 6310 }, { "epoch": 0.1924847295432033, "grad_norm": 32.25, "learning_rate": 0.00018787781145912847, "loss": 2.9433, "step": 6320 }, { "epoch": 0.19278929398868305, "grad_norm": 31.0, "learning_rate": 0.00018783325309883547, "loss": 2.9411, "step": 6330 }, { "epoch": 0.1930938584341628, "grad_norm": 31.625, "learning_rate": 0.00018778861893146498, "loss": 2.9368, "step": 6340 }, { "epoch": 0.19339842287964257, "grad_norm": 30.75, "learning_rate": 0.00018774390900048923, "loss": 2.9729, "step": 6350 }, { "epoch": 0.1937029873251223, "grad_norm": 32.25, "learning_rate": 0.00018769912334945426, "loss": 2.9487, "step": 6360 }, { "epoch": 0.19400755177060205, "grad_norm": 33.25, "learning_rate": 0.0001876542620219798, "loss": 2.9483, "step": 6370 }, { "epoch": 0.1943121162160818, "grad_norm": 30.25, "learning_rate": 0.00018760932506175942, "loss": 2.9493, "step": 6380 }, { "epoch": 0.19461668066156157, "grad_norm": 29.5, "learning_rate": 0.00018756431251256022, "loss": 2.9736, "step": 6390 }, { "epoch": 0.1949212451070413, "grad_norm": 29.625, "learning_rate": 0.000187519224418223, "loss": 2.9617, "step": 6400 }, { "epoch": 0.19522580955252106, "grad_norm": 31.25, "learning_rate": 0.00018747406082266204, "loss": 2.972, "step": 6410 }, { "epoch": 0.19553037399800083, "grad_norm": 30.625, "learning_rate": 0.00018742882176986524, "loss": 2.9362, "step": 6420 }, { "epoch": 0.19583493844348057, "grad_norm": 29.375, "learning_rate": 0.00018738350730389407, "loss": 2.9484, "step": 6430 }, { "epoch": 0.19613950288896032, "grad_norm": 31.875, "learning_rate": 0.00018733811746888328, "loss": 2.978, "step": 6440 }, { "epoch": 0.1964440673344401, "grad_norm": 31.0, "learning_rate": 0.00018729265230904113, "loss": 2.9438, "step": 6450 }, { "epoch": 0.19674863177991983, "grad_norm": 32.0, "learning_rate": 0.00018724711186864917, "loss": 2.968, "step": 6460 }, { "epoch": 0.19705319622539957, "grad_norm": 30.75, "learning_rate": 0.00018720149619206239, "loss": 2.9474, "step": 6470 }, { "epoch": 0.19735776067087935, "grad_norm": 32.25, "learning_rate": 0.00018715580532370894, "loss": 2.96, "step": 6480 }, { "epoch": 0.1976623251163591, "grad_norm": 30.5, "learning_rate": 0.00018711003930809027, "loss": 2.9603, "step": 6490 }, { "epoch": 0.19796688956183883, "grad_norm": 31.125, "learning_rate": 0.00018706419818978102, "loss": 2.9386, "step": 6500 }, { "epoch": 0.19827145400731858, "grad_norm": 30.375, "learning_rate": 0.0001870182820134289, "loss": 2.988, "step": 6510 }, { "epoch": 0.19857601845279835, "grad_norm": 32.0, "learning_rate": 0.00018697229082375487, "loss": 2.9747, "step": 6520 }, { "epoch": 0.1988805828982781, "grad_norm": 31.375, "learning_rate": 0.00018692622466555278, "loss": 2.9454, "step": 6530 }, { "epoch": 0.19918514734375783, "grad_norm": 30.625, "learning_rate": 0.00018688008358368964, "loss": 2.934, "step": 6540 }, { "epoch": 0.1994897117892376, "grad_norm": 32.25, "learning_rate": 0.00018683386762310538, "loss": 2.985, "step": 6550 }, { "epoch": 0.19979427623471735, "grad_norm": 31.25, "learning_rate": 0.0001867875768288128, "loss": 2.9314, "step": 6560 }, { "epoch": 0.2000988406801971, "grad_norm": 31.5, "learning_rate": 0.0001867412112458977, "loss": 2.9576, "step": 6570 }, { "epoch": 0.20040340512567686, "grad_norm": 32.75, "learning_rate": 0.00018669477091951862, "loss": 2.9541, "step": 6580 }, { "epoch": 0.2007079695711566, "grad_norm": 32.75, "learning_rate": 0.00018664825589490697, "loss": 2.9419, "step": 6590 }, { "epoch": 0.20101253401663635, "grad_norm": 30.5, "learning_rate": 0.00018660166621736686, "loss": 2.9516, "step": 6600 }, { "epoch": 0.2013170984621161, "grad_norm": 30.25, "learning_rate": 0.00018655500193227512, "loss": 2.97, "step": 6610 }, { "epoch": 0.20162166290759587, "grad_norm": 31.0, "learning_rate": 0.0001865082630850813, "loss": 2.934, "step": 6620 }, { "epoch": 0.2019262273530756, "grad_norm": 32.75, "learning_rate": 0.0001864614497213075, "loss": 2.9333, "step": 6630 }, { "epoch": 0.20223079179855535, "grad_norm": 30.625, "learning_rate": 0.00018641456188654842, "loss": 2.977, "step": 6640 }, { "epoch": 0.20253535624403513, "grad_norm": 32.75, "learning_rate": 0.00018636759962647133, "loss": 2.9863, "step": 6650 }, { "epoch": 0.20283992068951487, "grad_norm": 31.25, "learning_rate": 0.00018632056298681596, "loss": 2.9111, "step": 6660 }, { "epoch": 0.2031444851349946, "grad_norm": 31.375, "learning_rate": 0.00018627345201339442, "loss": 2.9658, "step": 6670 }, { "epoch": 0.20344904958047438, "grad_norm": 32.5, "learning_rate": 0.0001862262667520913, "loss": 2.941, "step": 6680 }, { "epoch": 0.20375361402595413, "grad_norm": 32.5, "learning_rate": 0.00018617900724886359, "loss": 2.9295, "step": 6690 }, { "epoch": 0.20405817847143387, "grad_norm": 32.25, "learning_rate": 0.00018613167354974045, "loss": 2.9401, "step": 6700 }, { "epoch": 0.20436274291691364, "grad_norm": 30.25, "learning_rate": 0.0001860842657008234, "loss": 2.9244, "step": 6710 }, { "epoch": 0.20466730736239339, "grad_norm": 30.875, "learning_rate": 0.00018603678374828615, "loss": 2.9486, "step": 6720 }, { "epoch": 0.20497187180787313, "grad_norm": 31.5, "learning_rate": 0.00018598922773837458, "loss": 2.929, "step": 6730 }, { "epoch": 0.20527643625335287, "grad_norm": 31.125, "learning_rate": 0.00018594159771740673, "loss": 2.9509, "step": 6740 }, { "epoch": 0.20558100069883264, "grad_norm": 30.25, "learning_rate": 0.0001858938937317727, "loss": 2.9369, "step": 6750 }, { "epoch": 0.2058855651443124, "grad_norm": 30.625, "learning_rate": 0.0001858461158279346, "loss": 2.9277, "step": 6760 }, { "epoch": 0.20619012958979213, "grad_norm": 30.75, "learning_rate": 0.0001857982640524266, "loss": 2.9335, "step": 6770 }, { "epoch": 0.2064946940352719, "grad_norm": 32.0, "learning_rate": 0.0001857503384518548, "loss": 2.9578, "step": 6780 }, { "epoch": 0.20679925848075165, "grad_norm": 31.5, "learning_rate": 0.0001857023390728972, "loss": 2.9415, "step": 6790 }, { "epoch": 0.2071038229262314, "grad_norm": 31.625, "learning_rate": 0.00018565426596230355, "loss": 2.974, "step": 6800 }, { "epoch": 0.20740838737171116, "grad_norm": 32.75, "learning_rate": 0.0001856061191668956, "loss": 2.953, "step": 6810 }, { "epoch": 0.2077129518171909, "grad_norm": 32.25, "learning_rate": 0.00018555789873356677, "loss": 2.9581, "step": 6820 }, { "epoch": 0.20801751626267065, "grad_norm": 31.75, "learning_rate": 0.00018550960470928215, "loss": 2.9353, "step": 6830 }, { "epoch": 0.20832208070815042, "grad_norm": 30.375, "learning_rate": 0.0001854612371410786, "loss": 2.956, "step": 6840 }, { "epoch": 0.20862664515363016, "grad_norm": 31.75, "learning_rate": 0.00018541279607606459, "loss": 2.9324, "step": 6850 }, { "epoch": 0.2089312095991099, "grad_norm": 31.375, "learning_rate": 0.0001853642815614201, "loss": 2.9334, "step": 6860 }, { "epoch": 0.20923577404458965, "grad_norm": 30.625, "learning_rate": 0.00018531569364439677, "loss": 2.9074, "step": 6870 }, { "epoch": 0.20954033849006942, "grad_norm": 31.125, "learning_rate": 0.00018526703237231764, "loss": 2.9273, "step": 6880 }, { "epoch": 0.20984490293554917, "grad_norm": 31.625, "learning_rate": 0.0001852182977925772, "loss": 2.9272, "step": 6890 }, { "epoch": 0.2101494673810289, "grad_norm": 33.5, "learning_rate": 0.00018516948995264134, "loss": 2.9255, "step": 6900 }, { "epoch": 0.21045403182650868, "grad_norm": 33.25, "learning_rate": 0.00018512060890004737, "loss": 2.9664, "step": 6910 }, { "epoch": 0.21075859627198842, "grad_norm": 32.5, "learning_rate": 0.0001850716546824038, "loss": 3.0032, "step": 6920 }, { "epoch": 0.21106316071746817, "grad_norm": 30.75, "learning_rate": 0.00018502262734739055, "loss": 2.9287, "step": 6930 }, { "epoch": 0.21136772516294794, "grad_norm": 32.0, "learning_rate": 0.0001849735269427586, "loss": 2.9235, "step": 6940 }, { "epoch": 0.21167228960842768, "grad_norm": 31.5, "learning_rate": 0.00018492435351633013, "loss": 2.9297, "step": 6950 }, { "epoch": 0.21197685405390743, "grad_norm": 31.625, "learning_rate": 0.00018487510711599853, "loss": 2.9428, "step": 6960 }, { "epoch": 0.21228141849938717, "grad_norm": 35.0, "learning_rate": 0.00018482578778972818, "loss": 2.9602, "step": 6970 }, { "epoch": 0.21258598294486694, "grad_norm": 30.875, "learning_rate": 0.00018477639558555451, "loss": 2.9417, "step": 6980 }, { "epoch": 0.21289054739034668, "grad_norm": 31.375, "learning_rate": 0.00018472693055158397, "loss": 2.9376, "step": 6990 }, { "epoch": 0.21319511183582643, "grad_norm": 30.75, "learning_rate": 0.00018467739273599383, "loss": 2.9433, "step": 7000 }, { "epoch": 0.2134996762813062, "grad_norm": 34.5, "learning_rate": 0.0001846277821870324, "loss": 2.9415, "step": 7010 }, { "epoch": 0.21380424072678594, "grad_norm": 30.75, "learning_rate": 0.0001845780989530187, "loss": 2.9283, "step": 7020 }, { "epoch": 0.2141088051722657, "grad_norm": 31.125, "learning_rate": 0.00018452834308234267, "loss": 2.9346, "step": 7030 }, { "epoch": 0.21441336961774546, "grad_norm": 30.875, "learning_rate": 0.00018447851462346485, "loss": 2.9352, "step": 7040 }, { "epoch": 0.2147179340632252, "grad_norm": 32.0, "learning_rate": 0.00018442861362491658, "loss": 2.9406, "step": 7050 }, { "epoch": 0.21502249850870495, "grad_norm": 31.375, "learning_rate": 0.00018437864013529982, "loss": 2.927, "step": 7060 }, { "epoch": 0.21532706295418472, "grad_norm": 30.625, "learning_rate": 0.00018432859420328712, "loss": 2.9134, "step": 7070 }, { "epoch": 0.21563162739966446, "grad_norm": 31.125, "learning_rate": 0.00018427847587762164, "loss": 2.9522, "step": 7080 }, { "epoch": 0.2159361918451442, "grad_norm": 31.625, "learning_rate": 0.00018422828520711697, "loss": 2.9197, "step": 7090 }, { "epoch": 0.21624075629062395, "grad_norm": 31.0, "learning_rate": 0.00018417802224065724, "loss": 2.9274, "step": 7100 }, { "epoch": 0.21654532073610372, "grad_norm": 30.125, "learning_rate": 0.00018412768702719695, "loss": 2.9397, "step": 7110 }, { "epoch": 0.21684988518158346, "grad_norm": 32.25, "learning_rate": 0.00018407727961576096, "loss": 2.9155, "step": 7120 }, { "epoch": 0.2171544496270632, "grad_norm": 34.5, "learning_rate": 0.00018402680005544445, "loss": 2.9937, "step": 7130 }, { "epoch": 0.21745901407254298, "grad_norm": 33.0, "learning_rate": 0.0001839762483954129, "loss": 2.9555, "step": 7140 }, { "epoch": 0.21776357851802272, "grad_norm": 30.25, "learning_rate": 0.00018392562468490204, "loss": 2.9254, "step": 7150 }, { "epoch": 0.21806814296350246, "grad_norm": 30.75, "learning_rate": 0.00018387492897321768, "loss": 2.9155, "step": 7160 }, { "epoch": 0.21837270740898224, "grad_norm": 30.5, "learning_rate": 0.0001838241613097358, "loss": 2.9225, "step": 7170 }, { "epoch": 0.21867727185446198, "grad_norm": 32.0, "learning_rate": 0.00018377332174390247, "loss": 2.9194, "step": 7180 }, { "epoch": 0.21898183629994172, "grad_norm": 31.75, "learning_rate": 0.00018372241032523382, "loss": 2.9358, "step": 7190 }, { "epoch": 0.21928640074542147, "grad_norm": 31.375, "learning_rate": 0.00018367142710331593, "loss": 2.9432, "step": 7200 }, { "epoch": 0.21959096519090124, "grad_norm": 31.625, "learning_rate": 0.00018362037212780476, "loss": 2.9358, "step": 7210 }, { "epoch": 0.21989552963638098, "grad_norm": 31.125, "learning_rate": 0.00018356924544842628, "loss": 2.9015, "step": 7220 }, { "epoch": 0.22020009408186073, "grad_norm": 52.5, "learning_rate": 0.00018351804711497615, "loss": 2.9361, "step": 7230 }, { "epoch": 0.2205046585273405, "grad_norm": 30.375, "learning_rate": 0.00018346677717731994, "loss": 2.9293, "step": 7240 }, { "epoch": 0.22080922297282024, "grad_norm": 32.5, "learning_rate": 0.00018341543568539293, "loss": 2.8921, "step": 7250 }, { "epoch": 0.22111378741829998, "grad_norm": 31.25, "learning_rate": 0.00018336402268920003, "loss": 2.8875, "step": 7260 }, { "epoch": 0.22141835186377976, "grad_norm": 30.875, "learning_rate": 0.00018331253823881586, "loss": 2.9668, "step": 7270 }, { "epoch": 0.2217229163092595, "grad_norm": 32.5, "learning_rate": 0.00018326098238438465, "loss": 2.9405, "step": 7280 }, { "epoch": 0.22202748075473924, "grad_norm": 31.375, "learning_rate": 0.0001832093551761201, "loss": 2.9545, "step": 7290 }, { "epoch": 0.22233204520021901, "grad_norm": 31.5, "learning_rate": 0.00018315765666430546, "loss": 2.9401, "step": 7300 }, { "epoch": 0.22263660964569876, "grad_norm": 33.0, "learning_rate": 0.0001831058868992934, "loss": 2.8944, "step": 7310 }, { "epoch": 0.2229411740911785, "grad_norm": 29.75, "learning_rate": 0.00018305404593150598, "loss": 2.9125, "step": 7320 }, { "epoch": 0.22324573853665824, "grad_norm": 30.875, "learning_rate": 0.0001830021338114347, "loss": 2.9337, "step": 7330 }, { "epoch": 0.22355030298213802, "grad_norm": 31.375, "learning_rate": 0.00018295015058964023, "loss": 2.9302, "step": 7340 }, { "epoch": 0.22385486742761776, "grad_norm": 31.875, "learning_rate": 0.00018289809631675253, "loss": 2.9322, "step": 7350 }, { "epoch": 0.2241594318730975, "grad_norm": 32.5, "learning_rate": 0.00018284597104347087, "loss": 2.8861, "step": 7360 }, { "epoch": 0.22446399631857727, "grad_norm": 30.0, "learning_rate": 0.00018279377482056355, "loss": 2.9331, "step": 7370 }, { "epoch": 0.22476856076405702, "grad_norm": 33.0, "learning_rate": 0.00018274150769886796, "loss": 2.9475, "step": 7380 }, { "epoch": 0.22507312520953676, "grad_norm": 30.125, "learning_rate": 0.0001826891697292906, "loss": 2.9149, "step": 7390 }, { "epoch": 0.22537768965501653, "grad_norm": 30.625, "learning_rate": 0.00018263676096280703, "loss": 2.9428, "step": 7400 }, { "epoch": 0.22568225410049628, "grad_norm": 32.5, "learning_rate": 0.00018258428145046164, "loss": 2.9385, "step": 7410 }, { "epoch": 0.22598681854597602, "grad_norm": 30.25, "learning_rate": 0.00018253173124336777, "loss": 2.9466, "step": 7420 }, { "epoch": 0.22629138299145576, "grad_norm": 29.5, "learning_rate": 0.0001824791103927077, "loss": 2.8978, "step": 7430 }, { "epoch": 0.22659594743693554, "grad_norm": 31.375, "learning_rate": 0.00018242641894973237, "loss": 2.9532, "step": 7440 }, { "epoch": 0.22690051188241528, "grad_norm": 30.625, "learning_rate": 0.00018237365696576155, "loss": 2.9337, "step": 7450 }, { "epoch": 0.22720507632789502, "grad_norm": 30.625, "learning_rate": 0.00018232082449218376, "loss": 2.9381, "step": 7460 }, { "epoch": 0.2275096407733748, "grad_norm": 30.25, "learning_rate": 0.00018226792158045608, "loss": 2.9448, "step": 7470 }, { "epoch": 0.22781420521885454, "grad_norm": 30.5, "learning_rate": 0.00018221494828210425, "loss": 2.9451, "step": 7480 }, { "epoch": 0.22811876966433428, "grad_norm": 31.25, "learning_rate": 0.00018216190464872257, "loss": 2.953, "step": 7490 }, { "epoch": 0.22842333410981405, "grad_norm": 30.375, "learning_rate": 0.0001821087907319738, "loss": 2.927, "step": 7500 }, { "epoch": 0.2287278985552938, "grad_norm": 29.375, "learning_rate": 0.0001820556065835892, "loss": 2.9706, "step": 7510 }, { "epoch": 0.22903246300077354, "grad_norm": 30.75, "learning_rate": 0.00018200235225536844, "loss": 2.953, "step": 7520 }, { "epoch": 0.2293370274462533, "grad_norm": 30.875, "learning_rate": 0.00018194902779917945, "loss": 2.9552, "step": 7530 }, { "epoch": 0.22964159189173305, "grad_norm": 33.0, "learning_rate": 0.00018189563326695857, "loss": 2.9377, "step": 7540 }, { "epoch": 0.2299461563372128, "grad_norm": 30.5, "learning_rate": 0.00018184216871071037, "loss": 2.9266, "step": 7550 }, { "epoch": 0.23025072078269254, "grad_norm": 30.625, "learning_rate": 0.0001817886341825076, "loss": 2.9257, "step": 7560 }, { "epoch": 0.2305552852281723, "grad_norm": 30.125, "learning_rate": 0.00018173502973449108, "loss": 2.9467, "step": 7570 }, { "epoch": 0.23085984967365206, "grad_norm": 31.375, "learning_rate": 0.00018168135541886992, "loss": 2.9211, "step": 7580 }, { "epoch": 0.2311644141191318, "grad_norm": 32.25, "learning_rate": 0.00018162761128792115, "loss": 2.9215, "step": 7590 }, { "epoch": 0.23146897856461157, "grad_norm": 30.0, "learning_rate": 0.0001815737973939898, "loss": 2.9301, "step": 7600 }, { "epoch": 0.23177354301009132, "grad_norm": 31.5, "learning_rate": 0.00018151991378948881, "loss": 2.9512, "step": 7610 }, { "epoch": 0.23207810745557106, "grad_norm": 34.0, "learning_rate": 0.0001814659605268992, "loss": 2.9227, "step": 7620 }, { "epoch": 0.23238267190105083, "grad_norm": 30.75, "learning_rate": 0.00018141193765876961, "loss": 2.9136, "step": 7630 }, { "epoch": 0.23268723634653057, "grad_norm": 32.0, "learning_rate": 0.0001813578452377166, "loss": 2.9351, "step": 7640 }, { "epoch": 0.23299180079201032, "grad_norm": 29.875, "learning_rate": 0.00018130368331642442, "loss": 2.9391, "step": 7650 }, { "epoch": 0.2332963652374901, "grad_norm": 31.375, "learning_rate": 0.00018124945194764508, "loss": 2.9072, "step": 7660 }, { "epoch": 0.23360092968296983, "grad_norm": 31.125, "learning_rate": 0.00018119515118419815, "loss": 2.8898, "step": 7670 }, { "epoch": 0.23390549412844958, "grad_norm": 32.5, "learning_rate": 0.0001811407810789708, "loss": 2.9098, "step": 7680 }, { "epoch": 0.23421005857392932, "grad_norm": 32.75, "learning_rate": 0.00018108634168491786, "loss": 2.9213, "step": 7690 }, { "epoch": 0.2345146230194091, "grad_norm": 30.875, "learning_rate": 0.0001810318330550614, "loss": 2.9115, "step": 7700 }, { "epoch": 0.23481918746488883, "grad_norm": 31.25, "learning_rate": 0.00018097725524249118, "loss": 2.9362, "step": 7710 }, { "epoch": 0.23512375191036858, "grad_norm": 30.75, "learning_rate": 0.00018092260830036418, "loss": 2.897, "step": 7720 }, { "epoch": 0.23542831635584835, "grad_norm": 30.125, "learning_rate": 0.00018086789228190478, "loss": 2.945, "step": 7730 }, { "epoch": 0.2357328808013281, "grad_norm": 31.125, "learning_rate": 0.00018081310724040456, "loss": 2.9001, "step": 7740 }, { "epoch": 0.23603744524680784, "grad_norm": 31.5, "learning_rate": 0.0001807582532292225, "loss": 2.9358, "step": 7750 }, { "epoch": 0.2363420096922876, "grad_norm": 30.5, "learning_rate": 0.0001807033303017845, "loss": 2.9109, "step": 7760 }, { "epoch": 0.23664657413776735, "grad_norm": 31.625, "learning_rate": 0.00018064833851158378, "loss": 2.9152, "step": 7770 }, { "epoch": 0.2369511385832471, "grad_norm": 31.0, "learning_rate": 0.0001805932779121806, "loss": 2.914, "step": 7780 }, { "epoch": 0.23725570302872684, "grad_norm": 31.625, "learning_rate": 0.00018053814855720218, "loss": 2.9286, "step": 7790 }, { "epoch": 0.2375602674742066, "grad_norm": 32.75, "learning_rate": 0.00018048295050034272, "loss": 2.9509, "step": 7800 }, { "epoch": 0.23786483191968635, "grad_norm": 30.5, "learning_rate": 0.00018042768379536336, "loss": 2.9048, "step": 7810 }, { "epoch": 0.2381693963651661, "grad_norm": 33.0, "learning_rate": 0.0001803723484960921, "loss": 2.9196, "step": 7820 }, { "epoch": 0.23847396081064587, "grad_norm": 31.625, "learning_rate": 0.00018031694465642372, "loss": 2.9368, "step": 7830 }, { "epoch": 0.2387785252561256, "grad_norm": 32.5, "learning_rate": 0.00018026147233031977, "loss": 2.9015, "step": 7840 }, { "epoch": 0.23908308970160536, "grad_norm": 31.625, "learning_rate": 0.0001802059315718085, "loss": 2.9316, "step": 7850 }, { "epoch": 0.23938765414708513, "grad_norm": 30.0, "learning_rate": 0.00018015032243498487, "loss": 2.9333, "step": 7860 }, { "epoch": 0.23969221859256487, "grad_norm": 30.875, "learning_rate": 0.0001800946449740103, "loss": 2.9627, "step": 7870 }, { "epoch": 0.23999678303804461, "grad_norm": 30.375, "learning_rate": 0.0001800388992431129, "loss": 2.9034, "step": 7880 }, { "epoch": 0.24030134748352439, "grad_norm": 32.25, "learning_rate": 0.00017998308529658722, "loss": 2.9115, "step": 7890 }, { "epoch": 0.24060591192900413, "grad_norm": 31.25, "learning_rate": 0.00017992720318879416, "loss": 2.9071, "step": 7900 }, { "epoch": 0.24091047637448387, "grad_norm": 32.0, "learning_rate": 0.00017987125297416122, "loss": 2.9518, "step": 7910 }, { "epoch": 0.24121504081996362, "grad_norm": 31.25, "learning_rate": 0.000179815234707182, "loss": 2.9437, "step": 7920 }, { "epoch": 0.2415196052654434, "grad_norm": 31.375, "learning_rate": 0.00017975914844241655, "loss": 2.9287, "step": 7930 }, { "epoch": 0.24182416971092313, "grad_norm": 31.375, "learning_rate": 0.000179702994234491, "loss": 2.9398, "step": 7940 }, { "epoch": 0.24212873415640287, "grad_norm": 31.0, "learning_rate": 0.0001796467721380978, "loss": 2.9469, "step": 7950 }, { "epoch": 0.24243329860188265, "grad_norm": 31.5, "learning_rate": 0.0001795904822079955, "loss": 2.8721, "step": 7960 }, { "epoch": 0.2427378630473624, "grad_norm": 29.625, "learning_rate": 0.00017953412449900862, "loss": 2.966, "step": 7970 }, { "epoch": 0.24304242749284213, "grad_norm": 31.125, "learning_rate": 0.0001794776990660277, "loss": 2.8951, "step": 7980 }, { "epoch": 0.2433469919383219, "grad_norm": 34.5, "learning_rate": 0.00017942120596400939, "loss": 2.9025, "step": 7990 }, { "epoch": 0.24365155638380165, "grad_norm": 32.25, "learning_rate": 0.00017936464524797606, "loss": 2.9438, "step": 8000 }, { "epoch": 0.2439561208292814, "grad_norm": 31.5, "learning_rate": 0.0001793080169730161, "loss": 2.9696, "step": 8010 }, { "epoch": 0.24426068527476114, "grad_norm": 30.375, "learning_rate": 0.00017925132119428364, "loss": 2.9257, "step": 8020 }, { "epoch": 0.2445652497202409, "grad_norm": 32.25, "learning_rate": 0.0001791945579669984, "loss": 2.9383, "step": 8030 }, { "epoch": 0.24486981416572065, "grad_norm": 31.25, "learning_rate": 0.00017913772734644604, "loss": 2.9157, "step": 8040 }, { "epoch": 0.2451743786112004, "grad_norm": 31.75, "learning_rate": 0.00017908082938797776, "loss": 2.941, "step": 8050 }, { "epoch": 0.24547894305668017, "grad_norm": 33.25, "learning_rate": 0.00017902386414701032, "loss": 2.8919, "step": 8060 }, { "epoch": 0.2457835075021599, "grad_norm": 31.0, "learning_rate": 0.000178966831679026, "loss": 2.9069, "step": 8070 }, { "epoch": 0.24608807194763965, "grad_norm": 30.75, "learning_rate": 0.0001789097320395726, "loss": 2.9511, "step": 8080 }, { "epoch": 0.24639263639311942, "grad_norm": 30.875, "learning_rate": 0.00017885256528426332, "loss": 2.8817, "step": 8090 }, { "epoch": 0.24669720083859917, "grad_norm": 30.125, "learning_rate": 0.00017879533146877673, "loss": 2.9376, "step": 8100 }, { "epoch": 0.2470017652840789, "grad_norm": 30.5, "learning_rate": 0.00017873803064885675, "loss": 2.9186, "step": 8110 }, { "epoch": 0.24730632972955868, "grad_norm": 31.875, "learning_rate": 0.00017868066288031245, "loss": 2.9106, "step": 8120 }, { "epoch": 0.24761089417503843, "grad_norm": 31.625, "learning_rate": 0.00017862322821901832, "loss": 2.9432, "step": 8130 }, { "epoch": 0.24791545862051817, "grad_norm": 31.5, "learning_rate": 0.0001785657267209138, "loss": 2.8944, "step": 8140 }, { "epoch": 0.2482200230659979, "grad_norm": 30.875, "learning_rate": 0.00017850815844200346, "loss": 2.9357, "step": 8150 }, { "epoch": 0.24852458751147768, "grad_norm": 32.75, "learning_rate": 0.00017845052343835698, "loss": 2.9109, "step": 8160 }, { "epoch": 0.24882915195695743, "grad_norm": 32.75, "learning_rate": 0.00017839282176610897, "loss": 2.9373, "step": 8170 }, { "epoch": 0.24913371640243717, "grad_norm": 31.625, "learning_rate": 0.00017833505348145901, "loss": 2.9203, "step": 8180 }, { "epoch": 0.24943828084791694, "grad_norm": 30.875, "learning_rate": 0.00017827721864067158, "loss": 2.9024, "step": 8190 }, { "epoch": 0.2497428452933967, "grad_norm": 30.625, "learning_rate": 0.00017821931730007594, "loss": 2.9139, "step": 8200 }, { "epoch": 0.25004740973887646, "grad_norm": 32.25, "learning_rate": 0.00017816134951606608, "loss": 2.9533, "step": 8210 }, { "epoch": 0.2503519741843562, "grad_norm": 31.125, "learning_rate": 0.00017810331534510077, "loss": 2.9364, "step": 8220 }, { "epoch": 0.25065653862983595, "grad_norm": 32.75, "learning_rate": 0.00017804521484370345, "loss": 2.9111, "step": 8230 }, { "epoch": 0.2509611030753157, "grad_norm": 30.25, "learning_rate": 0.00017798704806846208, "loss": 2.9484, "step": 8240 }, { "epoch": 0.25126566752079543, "grad_norm": 31.125, "learning_rate": 0.00017792881507602932, "loss": 2.8765, "step": 8250 }, { "epoch": 0.2515702319662752, "grad_norm": 29.0, "learning_rate": 0.00017787051592312213, "loss": 2.8946, "step": 8260 }, { "epoch": 0.251874796411755, "grad_norm": 32.75, "learning_rate": 0.00017781215066652205, "loss": 2.9435, "step": 8270 }, { "epoch": 0.2521793608572347, "grad_norm": 32.0, "learning_rate": 0.0001777537193630749, "loss": 2.9265, "step": 8280 }, { "epoch": 0.25248392530271446, "grad_norm": 30.75, "learning_rate": 0.00017769522206969096, "loss": 2.8837, "step": 8290 }, { "epoch": 0.25278848974819423, "grad_norm": 31.25, "learning_rate": 0.00017763665884334466, "loss": 2.9007, "step": 8300 }, { "epoch": 0.25309305419367395, "grad_norm": 33.25, "learning_rate": 0.00017757802974107471, "loss": 2.9137, "step": 8310 }, { "epoch": 0.2533976186391537, "grad_norm": 30.875, "learning_rate": 0.000177519334819984, "loss": 2.9026, "step": 8320 }, { "epoch": 0.25370218308463344, "grad_norm": 30.75, "learning_rate": 0.0001774605741372394, "loss": 2.8797, "step": 8330 }, { "epoch": 0.2540067475301132, "grad_norm": 30.25, "learning_rate": 0.000177401747750072, "loss": 2.8987, "step": 8340 }, { "epoch": 0.254311311975593, "grad_norm": 30.375, "learning_rate": 0.00017734285571577679, "loss": 2.8665, "step": 8350 }, { "epoch": 0.2546158764210727, "grad_norm": 31.125, "learning_rate": 0.00017728389809171263, "loss": 2.9332, "step": 8360 }, { "epoch": 0.25492044086655247, "grad_norm": 31.125, "learning_rate": 0.00017722487493530245, "loss": 2.9664, "step": 8370 }, { "epoch": 0.25522500531203224, "grad_norm": 30.125, "learning_rate": 0.00017716578630403286, "loss": 2.9183, "step": 8380 }, { "epoch": 0.25552956975751195, "grad_norm": 32.0, "learning_rate": 0.0001771066322554543, "loss": 2.9226, "step": 8390 }, { "epoch": 0.2558341342029917, "grad_norm": 30.875, "learning_rate": 0.00017704741284718084, "loss": 2.8937, "step": 8400 }, { "epoch": 0.2561386986484715, "grad_norm": 32.25, "learning_rate": 0.00017698812813689033, "loss": 2.9142, "step": 8410 }, { "epoch": 0.2564432630939512, "grad_norm": 33.25, "learning_rate": 0.0001769287781823242, "loss": 2.8872, "step": 8420 }, { "epoch": 0.256747827539431, "grad_norm": 30.125, "learning_rate": 0.0001768693630412873, "loss": 2.9042, "step": 8430 }, { "epoch": 0.25705239198491076, "grad_norm": 30.375, "learning_rate": 0.00017680988277164815, "loss": 2.9317, "step": 8440 }, { "epoch": 0.25735695643039047, "grad_norm": 31.0, "learning_rate": 0.00017675033743133853, "loss": 2.9173, "step": 8450 }, { "epoch": 0.25766152087587024, "grad_norm": 30.875, "learning_rate": 0.0001766907270783537, "loss": 2.9037, "step": 8460 }, { "epoch": 0.25796608532135, "grad_norm": 33.0, "learning_rate": 0.00017663105177075227, "loss": 2.9342, "step": 8470 }, { "epoch": 0.25827064976682973, "grad_norm": 31.75, "learning_rate": 0.00017657131156665603, "loss": 2.8872, "step": 8480 }, { "epoch": 0.2585752142123095, "grad_norm": 30.375, "learning_rate": 0.00017651150652425002, "loss": 2.93, "step": 8490 }, { "epoch": 0.2588797786577893, "grad_norm": 31.125, "learning_rate": 0.00017645163670178238, "loss": 2.8732, "step": 8500 }, { "epoch": 0.259184343103269, "grad_norm": 30.25, "learning_rate": 0.00017639170215756446, "loss": 2.9491, "step": 8510 }, { "epoch": 0.25948890754874876, "grad_norm": 31.625, "learning_rate": 0.00017633170294997053, "loss": 2.8975, "step": 8520 }, { "epoch": 0.25979347199422853, "grad_norm": 31.25, "learning_rate": 0.0001762716391374379, "loss": 2.9153, "step": 8530 }, { "epoch": 0.26009803643970825, "grad_norm": 31.75, "learning_rate": 0.00017621151077846676, "loss": 2.9419, "step": 8540 }, { "epoch": 0.260402600885188, "grad_norm": 30.875, "learning_rate": 0.0001761513179316202, "loss": 2.897, "step": 8550 }, { "epoch": 0.26070716533066773, "grad_norm": 30.625, "learning_rate": 0.00017609106065552413, "loss": 2.8898, "step": 8560 }, { "epoch": 0.2610117297761475, "grad_norm": 33.25, "learning_rate": 0.00017603073900886718, "loss": 2.8903, "step": 8570 }, { "epoch": 0.2613162942216273, "grad_norm": 30.5, "learning_rate": 0.0001759703530504007, "loss": 2.8983, "step": 8580 }, { "epoch": 0.261620858667107, "grad_norm": 30.875, "learning_rate": 0.00017590990283893862, "loss": 2.9179, "step": 8590 }, { "epoch": 0.26192542311258676, "grad_norm": 30.625, "learning_rate": 0.00017584938843335753, "loss": 2.9138, "step": 8600 }, { "epoch": 0.26222998755806654, "grad_norm": 31.25, "learning_rate": 0.00017578880989259653, "loss": 2.8865, "step": 8610 }, { "epoch": 0.26253455200354625, "grad_norm": 30.5, "learning_rate": 0.0001757281672756571, "loss": 2.9129, "step": 8620 }, { "epoch": 0.262839116449026, "grad_norm": 30.75, "learning_rate": 0.0001756674606416033, "loss": 2.9368, "step": 8630 }, { "epoch": 0.2631436808945058, "grad_norm": 32.0, "learning_rate": 0.00017560669004956133, "loss": 2.91, "step": 8640 }, { "epoch": 0.2634482453399855, "grad_norm": 30.75, "learning_rate": 0.00017554585555871983, "loss": 2.8586, "step": 8650 }, { "epoch": 0.2637528097854653, "grad_norm": 31.875, "learning_rate": 0.0001754849572283296, "loss": 2.8506, "step": 8660 }, { "epoch": 0.26405737423094505, "grad_norm": 31.25, "learning_rate": 0.0001754239951177037, "loss": 2.9506, "step": 8670 }, { "epoch": 0.26436193867642477, "grad_norm": 31.875, "learning_rate": 0.0001753629692862172, "loss": 2.8699, "step": 8680 }, { "epoch": 0.26466650312190454, "grad_norm": 30.625, "learning_rate": 0.00017530187979330737, "loss": 2.8948, "step": 8690 }, { "epoch": 0.2649710675673843, "grad_norm": 31.25, "learning_rate": 0.00017524072669847335, "loss": 2.9008, "step": 8700 }, { "epoch": 0.265275632012864, "grad_norm": 30.625, "learning_rate": 0.00017517951006127625, "loss": 2.904, "step": 8710 }, { "epoch": 0.2655801964583438, "grad_norm": 30.0, "learning_rate": 0.0001751182299413392, "loss": 2.899, "step": 8720 }, { "epoch": 0.26588476090382357, "grad_norm": 32.25, "learning_rate": 0.00017505688639834693, "loss": 2.923, "step": 8730 }, { "epoch": 0.2661893253493033, "grad_norm": 30.25, "learning_rate": 0.00017499547949204622, "loss": 2.867, "step": 8740 }, { "epoch": 0.26649388979478306, "grad_norm": 30.75, "learning_rate": 0.0001749340092822453, "loss": 2.9049, "step": 8750 }, { "epoch": 0.26679845424026283, "grad_norm": 32.5, "learning_rate": 0.0001748724758288142, "loss": 2.9284, "step": 8760 }, { "epoch": 0.26710301868574254, "grad_norm": 30.25, "learning_rate": 0.00017481087919168453, "loss": 2.9299, "step": 8770 }, { "epoch": 0.2674075831312223, "grad_norm": 29.875, "learning_rate": 0.00017474921943084944, "loss": 2.8942, "step": 8780 }, { "epoch": 0.26771214757670203, "grad_norm": 31.125, "learning_rate": 0.0001746874966063635, "loss": 2.9052, "step": 8790 }, { "epoch": 0.2680167120221818, "grad_norm": 31.5, "learning_rate": 0.00017462571077834282, "loss": 2.9351, "step": 8800 }, { "epoch": 0.2683212764676616, "grad_norm": 31.375, "learning_rate": 0.00017456386200696473, "loss": 2.9342, "step": 8810 }, { "epoch": 0.2686258409131413, "grad_norm": 31.875, "learning_rate": 0.000174501950352468, "loss": 2.898, "step": 8820 }, { "epoch": 0.26893040535862106, "grad_norm": 30.625, "learning_rate": 0.00017443997587515255, "loss": 2.9193, "step": 8830 }, { "epoch": 0.26923496980410083, "grad_norm": 31.5, "learning_rate": 0.00017437793863537952, "loss": 2.9048, "step": 8840 }, { "epoch": 0.26953953424958055, "grad_norm": 31.25, "learning_rate": 0.0001743158386935712, "loss": 2.8984, "step": 8850 }, { "epoch": 0.2698440986950603, "grad_norm": 30.75, "learning_rate": 0.00017425367611021092, "loss": 2.8838, "step": 8860 }, { "epoch": 0.2701486631405401, "grad_norm": 30.5, "learning_rate": 0.000174191450945843, "loss": 2.8965, "step": 8870 }, { "epoch": 0.2704532275860198, "grad_norm": 31.375, "learning_rate": 0.00017412916326107282, "loss": 2.8784, "step": 8880 }, { "epoch": 0.2707577920314996, "grad_norm": 30.5, "learning_rate": 0.00017406681311656655, "loss": 2.8998, "step": 8890 }, { "epoch": 0.27106235647697935, "grad_norm": 30.75, "learning_rate": 0.0001740044005730511, "loss": 2.9204, "step": 8900 }, { "epoch": 0.27136692092245907, "grad_norm": 30.375, "learning_rate": 0.00017394192569131445, "loss": 2.9584, "step": 8910 }, { "epoch": 0.27167148536793884, "grad_norm": 31.125, "learning_rate": 0.000173879388532205, "loss": 2.8987, "step": 8920 }, { "epoch": 0.2719760498134186, "grad_norm": 30.75, "learning_rate": 0.00017381678915663197, "loss": 2.8988, "step": 8930 }, { "epoch": 0.2722806142588983, "grad_norm": 31.0, "learning_rate": 0.00017375412762556515, "loss": 2.9199, "step": 8940 }, { "epoch": 0.2725851787043781, "grad_norm": 30.75, "learning_rate": 0.00017369140400003483, "loss": 2.9087, "step": 8950 }, { "epoch": 0.27288974314985787, "grad_norm": 36.0, "learning_rate": 0.00017362861834113177, "loss": 2.941, "step": 8960 }, { "epoch": 0.2731943075953376, "grad_norm": 30.0, "learning_rate": 0.00017356577071000722, "loss": 2.902, "step": 8970 }, { "epoch": 0.27349887204081735, "grad_norm": 31.875, "learning_rate": 0.00017350286116787274, "loss": 2.9243, "step": 8980 }, { "epoch": 0.2738034364862971, "grad_norm": 30.5, "learning_rate": 0.00017343988977600014, "loss": 2.8832, "step": 8990 }, { "epoch": 0.27410800093177684, "grad_norm": 32.5, "learning_rate": 0.0001733768565957216, "loss": 2.9049, "step": 9000 }, { "epoch": 0.2744125653772566, "grad_norm": 31.0, "learning_rate": 0.00017331376168842937, "loss": 2.9115, "step": 9010 }, { "epoch": 0.27471712982273633, "grad_norm": 31.375, "learning_rate": 0.00017325060511557582, "loss": 2.9113, "step": 9020 }, { "epoch": 0.2750216942682161, "grad_norm": 30.875, "learning_rate": 0.00017318738693867348, "loss": 2.93, "step": 9030 }, { "epoch": 0.27532625871369587, "grad_norm": 31.0, "learning_rate": 0.00017312410721929475, "loss": 2.908, "step": 9040 }, { "epoch": 0.2756308231591756, "grad_norm": 30.875, "learning_rate": 0.0001730607660190721, "loss": 2.9531, "step": 9050 }, { "epoch": 0.27593538760465536, "grad_norm": 31.25, "learning_rate": 0.0001729973633996978, "loss": 2.8782, "step": 9060 }, { "epoch": 0.27623995205013513, "grad_norm": 31.625, "learning_rate": 0.0001729338994229239, "loss": 2.8754, "step": 9070 }, { "epoch": 0.27654451649561484, "grad_norm": 30.25, "learning_rate": 0.0001728703741505623, "loss": 2.9034, "step": 9080 }, { "epoch": 0.2768490809410946, "grad_norm": 33.0, "learning_rate": 0.00017280678764448459, "loss": 2.9248, "step": 9090 }, { "epoch": 0.2771536453865744, "grad_norm": 32.75, "learning_rate": 0.00017274313996662195, "loss": 2.8524, "step": 9100 }, { "epoch": 0.2774582098320541, "grad_norm": 31.375, "learning_rate": 0.00017267943117896516, "loss": 2.908, "step": 9110 }, { "epoch": 0.2777627742775339, "grad_norm": 31.375, "learning_rate": 0.00017261566134356456, "loss": 2.9175, "step": 9120 }, { "epoch": 0.27806733872301365, "grad_norm": 31.875, "learning_rate": 0.00017255183052252988, "loss": 2.8972, "step": 9130 }, { "epoch": 0.27837190316849336, "grad_norm": 30.5, "learning_rate": 0.00017248793877803028, "loss": 2.8854, "step": 9140 }, { "epoch": 0.27867646761397313, "grad_norm": 32.0, "learning_rate": 0.00017242398617229427, "loss": 2.8861, "step": 9150 }, { "epoch": 0.2789810320594529, "grad_norm": 32.25, "learning_rate": 0.0001723599727676096, "loss": 2.8924, "step": 9160 }, { "epoch": 0.2792855965049326, "grad_norm": 30.75, "learning_rate": 0.00017229589862632333, "loss": 2.9314, "step": 9170 }, { "epoch": 0.2795901609504124, "grad_norm": 31.375, "learning_rate": 0.00017223176381084152, "loss": 2.9319, "step": 9180 }, { "epoch": 0.27989472539589216, "grad_norm": 32.25, "learning_rate": 0.00017216756838362948, "loss": 2.8902, "step": 9190 }, { "epoch": 0.2801992898413719, "grad_norm": 30.75, "learning_rate": 0.00017210331240721143, "loss": 2.8807, "step": 9200 }, { "epoch": 0.28050385428685165, "grad_norm": 30.625, "learning_rate": 0.00017203899594417065, "loss": 2.9049, "step": 9210 }, { "epoch": 0.2808084187323314, "grad_norm": 30.375, "learning_rate": 0.0001719746190571493, "loss": 2.9082, "step": 9220 }, { "epoch": 0.28111298317781114, "grad_norm": 31.375, "learning_rate": 0.00017191018180884836, "loss": 2.8924, "step": 9230 }, { "epoch": 0.2814175476232909, "grad_norm": 32.75, "learning_rate": 0.00017184568426202766, "loss": 2.9134, "step": 9240 }, { "epoch": 0.2817221120687706, "grad_norm": 30.375, "learning_rate": 0.00017178112647950574, "loss": 2.8794, "step": 9250 }, { "epoch": 0.2820266765142504, "grad_norm": 32.25, "learning_rate": 0.0001717165085241598, "loss": 2.8538, "step": 9260 }, { "epoch": 0.28233124095973017, "grad_norm": 31.625, "learning_rate": 0.0001716518304589256, "loss": 2.8957, "step": 9270 }, { "epoch": 0.2826358054052099, "grad_norm": 31.125, "learning_rate": 0.00017158709234679748, "loss": 2.8656, "step": 9280 }, { "epoch": 0.28294036985068965, "grad_norm": 31.0, "learning_rate": 0.00017152229425082836, "loss": 2.9048, "step": 9290 }, { "epoch": 0.2832449342961694, "grad_norm": 31.75, "learning_rate": 0.0001714574362341294, "loss": 2.9031, "step": 9300 }, { "epoch": 0.28354949874164914, "grad_norm": 30.875, "learning_rate": 0.0001713925183598703, "loss": 2.8851, "step": 9310 }, { "epoch": 0.2838540631871289, "grad_norm": 30.625, "learning_rate": 0.00017132754069127892, "loss": 2.8787, "step": 9320 }, { "epoch": 0.2841586276326087, "grad_norm": 32.25, "learning_rate": 0.00017126250329164145, "loss": 2.881, "step": 9330 }, { "epoch": 0.2844631920780884, "grad_norm": 31.25, "learning_rate": 0.00017119740622430216, "loss": 2.8966, "step": 9340 }, { "epoch": 0.28476775652356817, "grad_norm": 29.75, "learning_rate": 0.00017113224955266353, "loss": 2.881, "step": 9350 }, { "epoch": 0.28507232096904794, "grad_norm": 32.25, "learning_rate": 0.00017106703334018603, "loss": 2.9164, "step": 9360 }, { "epoch": 0.28537688541452766, "grad_norm": 31.625, "learning_rate": 0.00017100175765038823, "loss": 2.9037, "step": 9370 }, { "epoch": 0.28568144986000743, "grad_norm": 33.25, "learning_rate": 0.00017093642254684645, "loss": 2.8752, "step": 9380 }, { "epoch": 0.2859860143054872, "grad_norm": 29.75, "learning_rate": 0.00017087102809319504, "loss": 2.8958, "step": 9390 }, { "epoch": 0.2862905787509669, "grad_norm": 29.375, "learning_rate": 0.00017080557435312604, "loss": 2.8551, "step": 9400 }, { "epoch": 0.2865951431964467, "grad_norm": 32.75, "learning_rate": 0.00017074006139038928, "loss": 2.8962, "step": 9410 }, { "epoch": 0.28689970764192646, "grad_norm": 30.75, "learning_rate": 0.00017067448926879234, "loss": 2.8876, "step": 9420 }, { "epoch": 0.2872042720874062, "grad_norm": 31.5, "learning_rate": 0.00017060885805220027, "loss": 2.9011, "step": 9430 }, { "epoch": 0.28750883653288595, "grad_norm": 30.875, "learning_rate": 0.0001705431678045358, "loss": 2.8864, "step": 9440 }, { "epoch": 0.2878134009783657, "grad_norm": 31.625, "learning_rate": 0.0001704774185897791, "loss": 2.8848, "step": 9450 }, { "epoch": 0.28811796542384543, "grad_norm": 31.875, "learning_rate": 0.0001704116104719678, "loss": 2.874, "step": 9460 }, { "epoch": 0.2884225298693252, "grad_norm": 32.75, "learning_rate": 0.00017034574351519686, "loss": 2.9027, "step": 9470 }, { "epoch": 0.2887270943148049, "grad_norm": 30.0, "learning_rate": 0.00017027981778361856, "loss": 2.8681, "step": 9480 }, { "epoch": 0.2890316587602847, "grad_norm": 31.0, "learning_rate": 0.0001702138333414425, "loss": 2.9149, "step": 9490 }, { "epoch": 0.28933622320576446, "grad_norm": 31.375, "learning_rate": 0.00017014779025293532, "loss": 2.8804, "step": 9500 }, { "epoch": 0.2896407876512442, "grad_norm": 30.5, "learning_rate": 0.0001700816885824209, "loss": 2.8772, "step": 9510 }, { "epoch": 0.28994535209672395, "grad_norm": 30.0, "learning_rate": 0.00017001552839428013, "loss": 2.8601, "step": 9520 }, { "epoch": 0.2902499165422037, "grad_norm": 31.625, "learning_rate": 0.0001699493097529509, "loss": 2.8868, "step": 9530 }, { "epoch": 0.29055448098768344, "grad_norm": 30.75, "learning_rate": 0.000169883032722928, "loss": 2.8784, "step": 9540 }, { "epoch": 0.2908590454331632, "grad_norm": 30.875, "learning_rate": 0.0001698166973687632, "loss": 2.8813, "step": 9550 }, { "epoch": 0.291163609878643, "grad_norm": 30.5, "learning_rate": 0.0001697503037550649, "loss": 2.8973, "step": 9560 }, { "epoch": 0.2914681743241227, "grad_norm": 30.25, "learning_rate": 0.00016968385194649838, "loss": 2.8756, "step": 9570 }, { "epoch": 0.29177273876960247, "grad_norm": 31.25, "learning_rate": 0.0001696173420077856, "loss": 2.8786, "step": 9580 }, { "epoch": 0.29207730321508224, "grad_norm": 31.125, "learning_rate": 0.000169550774003705, "loss": 2.883, "step": 9590 }, { "epoch": 0.29238186766056196, "grad_norm": 31.125, "learning_rate": 0.0001694841479990917, "loss": 2.8874, "step": 9600 }, { "epoch": 0.2926864321060417, "grad_norm": 32.75, "learning_rate": 0.00016941746405883736, "loss": 2.8732, "step": 9610 }, { "epoch": 0.2929909965515215, "grad_norm": 31.5, "learning_rate": 0.0001693507222478899, "loss": 2.8923, "step": 9620 }, { "epoch": 0.2932955609970012, "grad_norm": 35.0, "learning_rate": 0.0001692839226312537, "loss": 2.8888, "step": 9630 }, { "epoch": 0.293600125442481, "grad_norm": 31.125, "learning_rate": 0.00016921706527398946, "loss": 2.8601, "step": 9640 }, { "epoch": 0.29390468988796076, "grad_norm": 32.0, "learning_rate": 0.00016915015024121407, "loss": 2.9158, "step": 9650 }, { "epoch": 0.2942092543334405, "grad_norm": 31.625, "learning_rate": 0.0001690831775981006, "loss": 2.8657, "step": 9660 }, { "epoch": 0.29451381877892024, "grad_norm": 33.0, "learning_rate": 0.0001690161474098783, "loss": 2.8919, "step": 9670 }, { "epoch": 0.2948183832244, "grad_norm": 31.875, "learning_rate": 0.00016894905974183236, "loss": 2.8881, "step": 9680 }, { "epoch": 0.29512294766987973, "grad_norm": 31.25, "learning_rate": 0.000168881914659304, "loss": 2.9094, "step": 9690 }, { "epoch": 0.2954275121153595, "grad_norm": 32.5, "learning_rate": 0.0001688147122276904, "loss": 2.8553, "step": 9700 }, { "epoch": 0.2957320765608392, "grad_norm": 31.25, "learning_rate": 0.00016874745251244452, "loss": 2.8715, "step": 9710 }, { "epoch": 0.296036641006319, "grad_norm": 30.75, "learning_rate": 0.00016868013557907522, "loss": 2.9067, "step": 9720 }, { "epoch": 0.29634120545179876, "grad_norm": 32.25, "learning_rate": 0.00016861276149314695, "loss": 2.9, "step": 9730 }, { "epoch": 0.2966457698972785, "grad_norm": 32.5, "learning_rate": 0.0001685453303202799, "loss": 2.9019, "step": 9740 }, { "epoch": 0.29695033434275825, "grad_norm": 31.875, "learning_rate": 0.0001684778421261499, "loss": 2.8705, "step": 9750 }, { "epoch": 0.297254898788238, "grad_norm": 31.625, "learning_rate": 0.00016841029697648826, "loss": 2.8942, "step": 9760 }, { "epoch": 0.29755946323371774, "grad_norm": 30.5, "learning_rate": 0.00016834269493708177, "loss": 2.8629, "step": 9770 }, { "epoch": 0.2978640276791975, "grad_norm": 32.75, "learning_rate": 0.00016827503607377261, "loss": 2.8633, "step": 9780 }, { "epoch": 0.2981685921246773, "grad_norm": 31.625, "learning_rate": 0.0001682073204524584, "loss": 2.9117, "step": 9790 }, { "epoch": 0.298473156570157, "grad_norm": 29.125, "learning_rate": 0.0001681395481390919, "loss": 2.893, "step": 9800 }, { "epoch": 0.29877772101563677, "grad_norm": 32.25, "learning_rate": 0.0001680717191996812, "loss": 2.871, "step": 9810 }, { "epoch": 0.29908228546111654, "grad_norm": 30.125, "learning_rate": 0.00016800383370028945, "loss": 2.8774, "step": 9820 }, { "epoch": 0.29938684990659625, "grad_norm": 30.5, "learning_rate": 0.00016793589170703498, "loss": 2.8592, "step": 9830 }, { "epoch": 0.299691414352076, "grad_norm": 30.875, "learning_rate": 0.00016786789328609108, "loss": 2.8845, "step": 9840 }, { "epoch": 0.2999959787975558, "grad_norm": 32.5, "learning_rate": 0.00016779983850368602, "loss": 2.9084, "step": 9850 }, { "epoch": 0.3003005432430355, "grad_norm": 31.25, "learning_rate": 0.00016773172742610298, "loss": 2.889, "step": 9860 }, { "epoch": 0.3006051076885153, "grad_norm": 29.75, "learning_rate": 0.00016766356011967994, "loss": 2.8533, "step": 9870 }, { "epoch": 0.30090967213399505, "grad_norm": 31.5, "learning_rate": 0.00016759533665080967, "loss": 2.9188, "step": 9880 }, { "epoch": 0.30121423657947477, "grad_norm": 33.25, "learning_rate": 0.0001675270570859396, "loss": 2.8966, "step": 9890 }, { "epoch": 0.30151880102495454, "grad_norm": 32.0, "learning_rate": 0.00016745872149157186, "loss": 2.8963, "step": 9900 }, { "epoch": 0.3018233654704343, "grad_norm": 34.75, "learning_rate": 0.00016739032993426308, "loss": 2.8711, "step": 9910 }, { "epoch": 0.30212792991591403, "grad_norm": 30.75, "learning_rate": 0.00016732188248062442, "loss": 2.8938, "step": 9920 }, { "epoch": 0.3024324943613938, "grad_norm": 32.5, "learning_rate": 0.00016725337919732154, "loss": 2.871, "step": 9930 }, { "epoch": 0.30273705880687357, "grad_norm": 31.375, "learning_rate": 0.0001671848201510744, "loss": 2.9031, "step": 9940 }, { "epoch": 0.3030416232523533, "grad_norm": 30.75, "learning_rate": 0.0001671162054086573, "loss": 2.8715, "step": 9950 }, { "epoch": 0.30334618769783306, "grad_norm": 30.625, "learning_rate": 0.0001670475350368988, "loss": 2.8797, "step": 9960 }, { "epoch": 0.3036507521433128, "grad_norm": 29.75, "learning_rate": 0.00016697880910268155, "loss": 2.8683, "step": 9970 }, { "epoch": 0.30395531658879255, "grad_norm": 31.25, "learning_rate": 0.0001669100276729425, "loss": 2.912, "step": 9980 }, { "epoch": 0.3042598810342723, "grad_norm": 30.5, "learning_rate": 0.00016684119081467256, "loss": 2.8712, "step": 9990 }, { "epoch": 0.30456444547975203, "grad_norm": 32.5, "learning_rate": 0.0001667722985949165, "loss": 2.8704, "step": 10000 }, { "epoch": 0.3048690099252318, "grad_norm": 31.0, "learning_rate": 0.00016670335108077316, "loss": 2.8883, "step": 10010 }, { "epoch": 0.3051735743707116, "grad_norm": 31.375, "learning_rate": 0.0001666343483393952, "loss": 2.8927, "step": 10020 }, { "epoch": 0.3054781388161913, "grad_norm": 31.25, "learning_rate": 0.00016656529043798906, "loss": 2.8993, "step": 10030 }, { "epoch": 0.30578270326167106, "grad_norm": 29.875, "learning_rate": 0.00016649617744381493, "loss": 2.8896, "step": 10040 }, { "epoch": 0.30608726770715083, "grad_norm": 32.25, "learning_rate": 0.00016642700942418663, "loss": 2.8877, "step": 10050 }, { "epoch": 0.30639183215263055, "grad_norm": 30.625, "learning_rate": 0.00016635778644647156, "loss": 2.8677, "step": 10060 }, { "epoch": 0.3066963965981103, "grad_norm": 32.75, "learning_rate": 0.0001662885085780907, "loss": 2.8712, "step": 10070 }, { "epoch": 0.3070009610435901, "grad_norm": 31.375, "learning_rate": 0.00016621917588651838, "loss": 2.8823, "step": 10080 }, { "epoch": 0.3073055254890698, "grad_norm": 30.875, "learning_rate": 0.00016614978843928251, "loss": 2.8969, "step": 10090 }, { "epoch": 0.3076100899345496, "grad_norm": 30.625, "learning_rate": 0.00016608034630396422, "loss": 2.8973, "step": 10100 }, { "epoch": 0.30791465438002935, "grad_norm": 30.375, "learning_rate": 0.00016601084954819779, "loss": 2.9046, "step": 10110 }, { "epoch": 0.30821921882550907, "grad_norm": 31.625, "learning_rate": 0.00016594129823967093, "loss": 2.8651, "step": 10120 }, { "epoch": 0.30852378327098884, "grad_norm": 31.375, "learning_rate": 0.00016587169244612434, "loss": 2.8934, "step": 10130 }, { "epoch": 0.3088283477164686, "grad_norm": 30.375, "learning_rate": 0.00016580203223535178, "loss": 2.896, "step": 10140 }, { "epoch": 0.3091329121619483, "grad_norm": 30.125, "learning_rate": 0.00016573231767520012, "loss": 2.8436, "step": 10150 }, { "epoch": 0.3094374766074281, "grad_norm": 32.25, "learning_rate": 0.00016566254883356902, "loss": 2.8996, "step": 10160 }, { "epoch": 0.30974204105290787, "grad_norm": 30.25, "learning_rate": 0.00016559272577841113, "loss": 2.8493, "step": 10170 }, { "epoch": 0.3100466054983876, "grad_norm": 33.5, "learning_rate": 0.00016552284857773185, "loss": 2.8752, "step": 10180 }, { "epoch": 0.31035116994386736, "grad_norm": 32.0, "learning_rate": 0.0001654529172995893, "loss": 2.8794, "step": 10190 }, { "epoch": 0.31065573438934707, "grad_norm": 31.625, "learning_rate": 0.0001653829320120943, "loss": 2.8906, "step": 10200 }, { "epoch": 0.31096029883482684, "grad_norm": 31.875, "learning_rate": 0.00016531289278341028, "loss": 2.8941, "step": 10210 }, { "epoch": 0.3112648632803066, "grad_norm": 31.25, "learning_rate": 0.0001652427996817531, "loss": 2.8749, "step": 10220 }, { "epoch": 0.31156942772578633, "grad_norm": 29.375, "learning_rate": 0.0001651726527753913, "loss": 2.8978, "step": 10230 }, { "epoch": 0.3118739921712661, "grad_norm": 30.25, "learning_rate": 0.00016510245213264562, "loss": 2.8678, "step": 10240 }, { "epoch": 0.3121785566167459, "grad_norm": 32.25, "learning_rate": 0.00016503219782188926, "loss": 2.8595, "step": 10250 }, { "epoch": 0.3124831210622256, "grad_norm": 30.75, "learning_rate": 0.00016496188991154765, "loss": 2.8441, "step": 10260 }, { "epoch": 0.31278768550770536, "grad_norm": 32.5, "learning_rate": 0.0001648915284700984, "loss": 2.9091, "step": 10270 }, { "epoch": 0.31309224995318513, "grad_norm": 30.875, "learning_rate": 0.00016482111356607134, "loss": 2.8937, "step": 10280 }, { "epoch": 0.31339681439866485, "grad_norm": 30.375, "learning_rate": 0.00016475064526804824, "loss": 2.879, "step": 10290 }, { "epoch": 0.3137013788441446, "grad_norm": 29.875, "learning_rate": 0.00016468012364466303, "loss": 2.8337, "step": 10300 }, { "epoch": 0.3140059432896244, "grad_norm": 33.25, "learning_rate": 0.0001646095487646014, "loss": 2.887, "step": 10310 }, { "epoch": 0.3143105077351041, "grad_norm": 31.125, "learning_rate": 0.00016453892069660111, "loss": 2.8763, "step": 10320 }, { "epoch": 0.3146150721805839, "grad_norm": 30.625, "learning_rate": 0.00016446823950945155, "loss": 2.8729, "step": 10330 }, { "epoch": 0.31491963662606365, "grad_norm": 30.625, "learning_rate": 0.00016439750527199394, "loss": 2.8221, "step": 10340 }, { "epoch": 0.31522420107154336, "grad_norm": 32.0, "learning_rate": 0.00016432671805312115, "loss": 2.8313, "step": 10350 }, { "epoch": 0.31552876551702314, "grad_norm": 30.5, "learning_rate": 0.00016425587792177763, "loss": 2.8712, "step": 10360 }, { "epoch": 0.3158333299625029, "grad_norm": 31.5, "learning_rate": 0.00016418498494695939, "loss": 2.8509, "step": 10370 }, { "epoch": 0.3161378944079826, "grad_norm": 31.875, "learning_rate": 0.00016411403919771388, "loss": 2.867, "step": 10380 }, { "epoch": 0.3164424588534624, "grad_norm": 30.25, "learning_rate": 0.00016404304074314004, "loss": 2.8944, "step": 10390 }, { "epoch": 0.31674702329894217, "grad_norm": 30.875, "learning_rate": 0.000163971989652388, "loss": 2.8107, "step": 10400 }, { "epoch": 0.3170515877444219, "grad_norm": 30.75, "learning_rate": 0.00016390088599465926, "loss": 2.8705, "step": 10410 }, { "epoch": 0.31735615218990165, "grad_norm": 31.875, "learning_rate": 0.00016382972983920648, "loss": 2.8807, "step": 10420 }, { "epoch": 0.31766071663538137, "grad_norm": 30.875, "learning_rate": 0.00016375852125533345, "loss": 2.8852, "step": 10430 }, { "epoch": 0.31796528108086114, "grad_norm": 30.75, "learning_rate": 0.00016368726031239507, "loss": 2.8913, "step": 10440 }, { "epoch": 0.3182698455263409, "grad_norm": 33.0, "learning_rate": 0.0001636159470797972, "loss": 2.877, "step": 10450 }, { "epoch": 0.3185744099718206, "grad_norm": 32.0, "learning_rate": 0.00016354458162699657, "loss": 2.8618, "step": 10460 }, { "epoch": 0.3188789744173004, "grad_norm": 32.75, "learning_rate": 0.0001634731640235009, "loss": 2.8611, "step": 10470 }, { "epoch": 0.31918353886278017, "grad_norm": 31.25, "learning_rate": 0.00016340169433886855, "loss": 2.8882, "step": 10480 }, { "epoch": 0.3194881033082599, "grad_norm": 30.125, "learning_rate": 0.00016333017264270875, "loss": 2.8599, "step": 10490 }, { "epoch": 0.31979266775373966, "grad_norm": 31.0, "learning_rate": 0.00016325859900468133, "loss": 2.8518, "step": 10500 }, { "epoch": 0.32009723219921943, "grad_norm": 30.625, "learning_rate": 0.00016318697349449666, "loss": 2.8847, "step": 10510 }, { "epoch": 0.32040179664469914, "grad_norm": 32.0, "learning_rate": 0.0001631152961819157, "loss": 2.9084, "step": 10520 }, { "epoch": 0.3207063610901789, "grad_norm": 32.5, "learning_rate": 0.00016304356713674984, "loss": 2.8823, "step": 10530 }, { "epoch": 0.3210109255356587, "grad_norm": 31.0, "learning_rate": 0.00016297178642886087, "loss": 2.8626, "step": 10540 }, { "epoch": 0.3213154899811384, "grad_norm": 30.25, "learning_rate": 0.00016289995412816082, "loss": 2.8461, "step": 10550 }, { "epoch": 0.3216200544266182, "grad_norm": 30.375, "learning_rate": 0.00016282807030461213, "loss": 2.8949, "step": 10560 }, { "epoch": 0.32192461887209795, "grad_norm": 31.0, "learning_rate": 0.0001627561350282273, "loss": 2.8792, "step": 10570 }, { "epoch": 0.32222918331757766, "grad_norm": 31.625, "learning_rate": 0.0001626841483690689, "loss": 2.8655, "step": 10580 }, { "epoch": 0.32253374776305743, "grad_norm": 31.75, "learning_rate": 0.0001626121103972497, "loss": 2.8527, "step": 10590 }, { "epoch": 0.3228383122085372, "grad_norm": 32.5, "learning_rate": 0.00016254002118293228, "loss": 2.8826, "step": 10600 }, { "epoch": 0.3231428766540169, "grad_norm": 30.875, "learning_rate": 0.00016246788079632925, "loss": 2.9123, "step": 10610 }, { "epoch": 0.3234474410994967, "grad_norm": 30.375, "learning_rate": 0.00016239568930770303, "loss": 2.8338, "step": 10620 }, { "epoch": 0.32375200554497646, "grad_norm": 33.0, "learning_rate": 0.00016232344678736578, "loss": 2.885, "step": 10630 }, { "epoch": 0.3240565699904562, "grad_norm": 30.625, "learning_rate": 0.0001622511533056794, "loss": 2.8982, "step": 10640 }, { "epoch": 0.32436113443593595, "grad_norm": 33.25, "learning_rate": 0.0001621788089330554, "loss": 2.8741, "step": 10650 }, { "epoch": 0.32466569888141567, "grad_norm": 30.5, "learning_rate": 0.0001621064137399549, "loss": 2.8528, "step": 10660 }, { "epoch": 0.32497026332689544, "grad_norm": 31.75, "learning_rate": 0.0001620339677968884, "loss": 2.8595, "step": 10670 }, { "epoch": 0.3252748277723752, "grad_norm": 31.375, "learning_rate": 0.000161961471174416, "loss": 2.8789, "step": 10680 }, { "epoch": 0.3255793922178549, "grad_norm": 31.0, "learning_rate": 0.00016188892394314699, "loss": 2.8547, "step": 10690 }, { "epoch": 0.3258839566633347, "grad_norm": 30.25, "learning_rate": 0.0001618163261737401, "loss": 2.8042, "step": 10700 }, { "epoch": 0.32618852110881447, "grad_norm": 30.5, "learning_rate": 0.00016174367793690318, "loss": 2.8664, "step": 10710 }, { "epoch": 0.3264930855542942, "grad_norm": 32.0, "learning_rate": 0.00016167097930339328, "loss": 2.8733, "step": 10720 }, { "epoch": 0.32679764999977395, "grad_norm": 29.375, "learning_rate": 0.00016159823034401653, "loss": 2.8765, "step": 10730 }, { "epoch": 0.3271022144452537, "grad_norm": 31.625, "learning_rate": 0.00016152543112962805, "loss": 2.8891, "step": 10740 }, { "epoch": 0.32740677889073344, "grad_norm": 31.625, "learning_rate": 0.00016145258173113193, "loss": 2.9043, "step": 10750 }, { "epoch": 0.3277113433362132, "grad_norm": 31.75, "learning_rate": 0.0001613796822194812, "loss": 2.8775, "step": 10760 }, { "epoch": 0.328015907781693, "grad_norm": 31.0, "learning_rate": 0.00016130673266567753, "loss": 2.8323, "step": 10770 }, { "epoch": 0.3283204722271727, "grad_norm": 33.25, "learning_rate": 0.00016123373314077145, "loss": 2.8819, "step": 10780 }, { "epoch": 0.32862503667265247, "grad_norm": 33.75, "learning_rate": 0.00016116068371586223, "loss": 2.8838, "step": 10790 }, { "epoch": 0.32892960111813224, "grad_norm": 31.875, "learning_rate": 0.00016108758446209756, "loss": 2.8856, "step": 10800 }, { "epoch": 0.32923416556361196, "grad_norm": 30.875, "learning_rate": 0.0001610144354506738, "loss": 2.8692, "step": 10810 }, { "epoch": 0.32953873000909173, "grad_norm": 30.875, "learning_rate": 0.00016094123675283568, "loss": 2.8679, "step": 10820 }, { "epoch": 0.3298432944545715, "grad_norm": 31.25, "learning_rate": 0.00016086798843987642, "loss": 2.8832, "step": 10830 }, { "epoch": 0.3301478589000512, "grad_norm": 31.75, "learning_rate": 0.00016079469058313751, "loss": 2.8664, "step": 10840 }, { "epoch": 0.330452423345531, "grad_norm": 33.25, "learning_rate": 0.0001607213432540087, "loss": 2.8785, "step": 10850 }, { "epoch": 0.33075698779101076, "grad_norm": 30.75, "learning_rate": 0.00016064794652392788, "loss": 2.8711, "step": 10860 }, { "epoch": 0.3310615522364905, "grad_norm": 30.875, "learning_rate": 0.00016057450046438114, "loss": 2.8601, "step": 10870 }, { "epoch": 0.33136611668197025, "grad_norm": 31.75, "learning_rate": 0.00016050100514690253, "loss": 2.8277, "step": 10880 }, { "epoch": 0.33167068112744996, "grad_norm": 29.75, "learning_rate": 0.00016042746064307418, "loss": 2.8775, "step": 10890 }, { "epoch": 0.33197524557292973, "grad_norm": 31.5, "learning_rate": 0.00016035386702452603, "loss": 2.8942, "step": 10900 }, { "epoch": 0.3322798100184095, "grad_norm": 30.125, "learning_rate": 0.00016028022436293587, "loss": 2.8594, "step": 10910 }, { "epoch": 0.3325843744638892, "grad_norm": 32.0, "learning_rate": 0.0001602065327300293, "loss": 2.8654, "step": 10920 }, { "epoch": 0.332888938909369, "grad_norm": 31.25, "learning_rate": 0.0001601327921975796, "loss": 2.8516, "step": 10930 }, { "epoch": 0.33319350335484876, "grad_norm": 30.375, "learning_rate": 0.00016005900283740766, "loss": 2.8784, "step": 10940 }, { "epoch": 0.3334980678003285, "grad_norm": 32.0, "learning_rate": 0.0001599851647213819, "loss": 2.8657, "step": 10950 }, { "epoch": 0.33380263224580825, "grad_norm": 36.0, "learning_rate": 0.00015991127792141832, "loss": 2.8807, "step": 10960 }, { "epoch": 0.334107196691288, "grad_norm": 31.125, "learning_rate": 0.0001598373425094803, "loss": 2.8882, "step": 10970 }, { "epoch": 0.33441176113676774, "grad_norm": 30.875, "learning_rate": 0.00015976335855757848, "loss": 2.8494, "step": 10980 }, { "epoch": 0.3347163255822475, "grad_norm": 32.5, "learning_rate": 0.00015968932613777086, "loss": 2.8389, "step": 10990 }, { "epoch": 0.3350208900277273, "grad_norm": 30.375, "learning_rate": 0.00015961524532216264, "loss": 2.8481, "step": 11000 }, { "epoch": 0.335325454473207, "grad_norm": 32.25, "learning_rate": 0.00015954111618290612, "loss": 2.8776, "step": 11010 }, { "epoch": 0.33563001891868677, "grad_norm": 32.75, "learning_rate": 0.00015946693879220072, "loss": 2.8298, "step": 11020 }, { "epoch": 0.33593458336416654, "grad_norm": 32.75, "learning_rate": 0.00015939271322229284, "loss": 2.8503, "step": 11030 }, { "epoch": 0.33623914780964625, "grad_norm": 29.875, "learning_rate": 0.0001593184395454757, "loss": 2.9072, "step": 11040 }, { "epoch": 0.336543712255126, "grad_norm": 31.25, "learning_rate": 0.00015924411783408958, "loss": 2.8739, "step": 11050 }, { "epoch": 0.3368482767006058, "grad_norm": 33.0, "learning_rate": 0.0001591697481605214, "loss": 2.8592, "step": 11060 }, { "epoch": 0.3371528411460855, "grad_norm": 31.625, "learning_rate": 0.00015909533059720474, "loss": 2.8638, "step": 11070 }, { "epoch": 0.3374574055915653, "grad_norm": 31.125, "learning_rate": 0.00015902086521662003, "loss": 2.8908, "step": 11080 }, { "epoch": 0.33776197003704506, "grad_norm": 31.875, "learning_rate": 0.00015894635209129406, "loss": 2.8349, "step": 11090 }, { "epoch": 0.33806653448252477, "grad_norm": 31.125, "learning_rate": 0.00015887179129380022, "loss": 2.8348, "step": 11100 }, { "epoch": 0.33837109892800454, "grad_norm": 31.0, "learning_rate": 0.00015879718289675836, "loss": 2.8498, "step": 11110 }, { "epoch": 0.33867566337348426, "grad_norm": 32.5, "learning_rate": 0.00015872252697283462, "loss": 2.88, "step": 11120 }, { "epoch": 0.33898022781896403, "grad_norm": 30.5, "learning_rate": 0.0001586478235947415, "loss": 2.8628, "step": 11130 }, { "epoch": 0.3392847922644438, "grad_norm": 30.25, "learning_rate": 0.00015857307283523758, "loss": 2.8385, "step": 11140 }, { "epoch": 0.3395893567099235, "grad_norm": 30.75, "learning_rate": 0.00015849827476712783, "loss": 2.8775, "step": 11150 }, { "epoch": 0.3398939211554033, "grad_norm": 32.5, "learning_rate": 0.00015842342946326306, "loss": 2.8822, "step": 11160 }, { "epoch": 0.34019848560088306, "grad_norm": 31.375, "learning_rate": 0.0001583485369965402, "loss": 2.8477, "step": 11170 }, { "epoch": 0.3405030500463628, "grad_norm": 30.875, "learning_rate": 0.00015827359743990208, "loss": 2.8837, "step": 11180 }, { "epoch": 0.34080761449184255, "grad_norm": 31.75, "learning_rate": 0.00015819861086633749, "loss": 2.899, "step": 11190 }, { "epoch": 0.3411121789373223, "grad_norm": 33.25, "learning_rate": 0.00015812357734888087, "loss": 2.8667, "step": 11200 }, { "epoch": 0.34141674338280203, "grad_norm": 30.75, "learning_rate": 0.0001580484969606124, "loss": 2.8532, "step": 11210 }, { "epoch": 0.3417213078282818, "grad_norm": 33.5, "learning_rate": 0.00015797336977465808, "loss": 2.8908, "step": 11220 }, { "epoch": 0.3420258722737616, "grad_norm": 30.625, "learning_rate": 0.00015789819586418925, "loss": 2.8639, "step": 11230 }, { "epoch": 0.3423304367192413, "grad_norm": 31.75, "learning_rate": 0.0001578229753024229, "loss": 2.8331, "step": 11240 }, { "epoch": 0.34263500116472106, "grad_norm": 30.125, "learning_rate": 0.00015774770816262144, "loss": 2.8714, "step": 11250 }, { "epoch": 0.34293956561020084, "grad_norm": 30.875, "learning_rate": 0.00015767239451809268, "loss": 2.8524, "step": 11260 }, { "epoch": 0.34324413005568055, "grad_norm": 30.75, "learning_rate": 0.00015759703444218962, "loss": 2.8413, "step": 11270 }, { "epoch": 0.3435486945011603, "grad_norm": 32.0, "learning_rate": 0.0001575216280083106, "loss": 2.8498, "step": 11280 }, { "epoch": 0.3438532589466401, "grad_norm": 30.5, "learning_rate": 0.00015744617528989897, "loss": 2.8411, "step": 11290 }, { "epoch": 0.3441578233921198, "grad_norm": 30.75, "learning_rate": 0.00015737067636044337, "loss": 2.8555, "step": 11300 }, { "epoch": 0.3444623878375996, "grad_norm": 31.5, "learning_rate": 0.00015729513129347718, "loss": 2.8689, "step": 11310 }, { "epoch": 0.34476695228307935, "grad_norm": 32.25, "learning_rate": 0.00015721954016257892, "loss": 2.8454, "step": 11320 }, { "epoch": 0.34507151672855907, "grad_norm": 31.375, "learning_rate": 0.00015714390304137196, "loss": 2.8715, "step": 11330 }, { "epoch": 0.34537608117403884, "grad_norm": 30.75, "learning_rate": 0.00015706822000352433, "loss": 2.905, "step": 11340 }, { "epoch": 0.34568064561951856, "grad_norm": 30.5, "learning_rate": 0.00015699249112274892, "loss": 2.8739, "step": 11350 }, { "epoch": 0.3459852100649983, "grad_norm": 32.0, "learning_rate": 0.0001569167164728032, "loss": 2.857, "step": 11360 }, { "epoch": 0.3462897745104781, "grad_norm": 29.875, "learning_rate": 0.00015684089612748924, "loss": 2.8334, "step": 11370 }, { "epoch": 0.3465943389559578, "grad_norm": 30.25, "learning_rate": 0.00015676503016065363, "loss": 2.8472, "step": 11380 }, { "epoch": 0.3468989034014376, "grad_norm": 32.25, "learning_rate": 0.00015668911864618736, "loss": 2.8411, "step": 11390 }, { "epoch": 0.34720346784691736, "grad_norm": 33.25, "learning_rate": 0.0001566131616580258, "loss": 2.8654, "step": 11400 }, { "epoch": 0.3475080322923971, "grad_norm": 31.75, "learning_rate": 0.00015653715927014855, "loss": 2.8159, "step": 11410 }, { "epoch": 0.34781259673787684, "grad_norm": 32.0, "learning_rate": 0.00015646111155657953, "loss": 2.8292, "step": 11420 }, { "epoch": 0.3481171611833566, "grad_norm": 31.125, "learning_rate": 0.0001563850185913868, "loss": 2.8449, "step": 11430 }, { "epoch": 0.34842172562883633, "grad_norm": 33.5, "learning_rate": 0.00015630888044868238, "loss": 2.8603, "step": 11440 }, { "epoch": 0.3487262900743161, "grad_norm": 31.75, "learning_rate": 0.00015623269720262242, "loss": 2.8614, "step": 11450 }, { "epoch": 0.3490308545197959, "grad_norm": 31.875, "learning_rate": 0.00015615646892740693, "loss": 2.8487, "step": 11460 }, { "epoch": 0.3493354189652756, "grad_norm": 31.375, "learning_rate": 0.00015608019569727973, "loss": 2.8607, "step": 11470 }, { "epoch": 0.34963998341075536, "grad_norm": 30.625, "learning_rate": 0.00015600387758652858, "loss": 2.8787, "step": 11480 }, { "epoch": 0.34994454785623513, "grad_norm": 31.625, "learning_rate": 0.00015592751466948483, "loss": 2.8757, "step": 11490 }, { "epoch": 0.35024911230171485, "grad_norm": 30.0, "learning_rate": 0.00015585110702052345, "loss": 2.8612, "step": 11500 }, { "epoch": 0.3505536767471946, "grad_norm": 31.375, "learning_rate": 0.0001557746547140631, "loss": 2.83, "step": 11510 }, { "epoch": 0.3508582411926744, "grad_norm": 31.125, "learning_rate": 0.00015569815782456577, "loss": 2.8453, "step": 11520 }, { "epoch": 0.3511628056381541, "grad_norm": 30.375, "learning_rate": 0.000155621616426537, "loss": 2.8495, "step": 11530 }, { "epoch": 0.3514673700836339, "grad_norm": 32.75, "learning_rate": 0.0001555450305945257, "loss": 2.8737, "step": 11540 }, { "epoch": 0.35177193452911365, "grad_norm": 32.0, "learning_rate": 0.00015546840040312393, "loss": 2.8594, "step": 11550 }, { "epoch": 0.35207649897459337, "grad_norm": 32.0, "learning_rate": 0.00015539172592696706, "loss": 2.8934, "step": 11560 }, { "epoch": 0.35238106342007314, "grad_norm": 31.0, "learning_rate": 0.00015531500724073354, "loss": 2.8646, "step": 11570 }, { "epoch": 0.35268562786555285, "grad_norm": 32.75, "learning_rate": 0.0001552382444191449, "loss": 2.8021, "step": 11580 }, { "epoch": 0.3529901923110326, "grad_norm": 32.75, "learning_rate": 0.00015516143753696563, "loss": 2.8523, "step": 11590 }, { "epoch": 0.3532947567565124, "grad_norm": 30.5, "learning_rate": 0.0001550845866690032, "loss": 2.8349, "step": 11600 }, { "epoch": 0.3535993212019921, "grad_norm": 30.375, "learning_rate": 0.00015500769189010787, "loss": 2.8418, "step": 11610 }, { "epoch": 0.3539038856474719, "grad_norm": 31.625, "learning_rate": 0.0001549307532751726, "loss": 2.8814, "step": 11620 }, { "epoch": 0.35420845009295165, "grad_norm": 30.25, "learning_rate": 0.0001548537708991332, "loss": 2.8608, "step": 11630 }, { "epoch": 0.35451301453843137, "grad_norm": 30.75, "learning_rate": 0.000154776744836968, "loss": 2.85, "step": 11640 }, { "epoch": 0.35481757898391114, "grad_norm": 31.0, "learning_rate": 0.0001546996751636979, "loss": 2.882, "step": 11650 }, { "epoch": 0.3551221434293909, "grad_norm": 30.125, "learning_rate": 0.00015462256195438625, "loss": 2.8208, "step": 11660 }, { "epoch": 0.35542670787487063, "grad_norm": 30.75, "learning_rate": 0.00015454540528413887, "loss": 2.8767, "step": 11670 }, { "epoch": 0.3557312723203504, "grad_norm": 32.5, "learning_rate": 0.00015446820522810382, "loss": 2.847, "step": 11680 }, { "epoch": 0.35603583676583017, "grad_norm": 30.0, "learning_rate": 0.00015439096186147154, "loss": 2.8482, "step": 11690 }, { "epoch": 0.3563404012113099, "grad_norm": 33.25, "learning_rate": 0.00015431367525947447, "loss": 2.8347, "step": 11700 }, { "epoch": 0.35664496565678966, "grad_norm": 29.5, "learning_rate": 0.00015423634549738735, "loss": 2.8529, "step": 11710 }, { "epoch": 0.35694953010226943, "grad_norm": 31.125, "learning_rate": 0.00015415897265052682, "loss": 2.8719, "step": 11720 }, { "epoch": 0.35725409454774915, "grad_norm": 31.75, "learning_rate": 0.0001540815567942516, "loss": 2.8557, "step": 11730 }, { "epoch": 0.3575586589932289, "grad_norm": 32.5, "learning_rate": 0.0001540040980039622, "loss": 2.8672, "step": 11740 }, { "epoch": 0.3578632234387087, "grad_norm": 31.75, "learning_rate": 0.00015392659635510094, "loss": 2.8426, "step": 11750 }, { "epoch": 0.3581677878841884, "grad_norm": 30.25, "learning_rate": 0.00015384905192315199, "loss": 2.8736, "step": 11760 }, { "epoch": 0.3584723523296682, "grad_norm": 30.625, "learning_rate": 0.00015377146478364113, "loss": 2.8507, "step": 11770 }, { "epoch": 0.35877691677514795, "grad_norm": 32.0, "learning_rate": 0.0001536938350121357, "loss": 2.8781, "step": 11780 }, { "epoch": 0.35908148122062766, "grad_norm": 30.875, "learning_rate": 0.00015361616268424458, "loss": 2.8452, "step": 11790 }, { "epoch": 0.35938604566610743, "grad_norm": 30.875, "learning_rate": 0.00015353844787561812, "loss": 2.845, "step": 11800 }, { "epoch": 0.3596906101115872, "grad_norm": 31.5, "learning_rate": 0.00015346069066194805, "loss": 2.8303, "step": 11810 }, { "epoch": 0.3599951745570669, "grad_norm": 31.125, "learning_rate": 0.0001533828911189674, "loss": 2.8475, "step": 11820 }, { "epoch": 0.3602997390025467, "grad_norm": 30.0, "learning_rate": 0.0001533050493224504, "loss": 2.8266, "step": 11830 }, { "epoch": 0.3606043034480264, "grad_norm": 30.25, "learning_rate": 0.00015322716534821242, "loss": 2.8848, "step": 11840 }, { "epoch": 0.3609088678935062, "grad_norm": 33.5, "learning_rate": 0.00015314923927211, "loss": 2.843, "step": 11850 }, { "epoch": 0.36121343233898595, "grad_norm": 30.25, "learning_rate": 0.00015307127117004056, "loss": 2.8228, "step": 11860 }, { "epoch": 0.36151799678446567, "grad_norm": 31.5, "learning_rate": 0.00015299326111794257, "loss": 2.8501, "step": 11870 }, { "epoch": 0.36182256122994544, "grad_norm": 30.0, "learning_rate": 0.00015291520919179528, "loss": 2.8339, "step": 11880 }, { "epoch": 0.3621271256754252, "grad_norm": 30.375, "learning_rate": 0.0001528371154676187, "loss": 2.823, "step": 11890 }, { "epoch": 0.3624316901209049, "grad_norm": 30.375, "learning_rate": 0.00015275898002147369, "loss": 2.8643, "step": 11900 }, { "epoch": 0.3627362545663847, "grad_norm": 30.75, "learning_rate": 0.00015268080292946156, "loss": 2.8478, "step": 11910 }, { "epoch": 0.36304081901186447, "grad_norm": 30.875, "learning_rate": 0.00015260258426772437, "loss": 2.8497, "step": 11920 }, { "epoch": 0.3633453834573442, "grad_norm": 33.0, "learning_rate": 0.00015252432411244453, "loss": 2.8376, "step": 11930 }, { "epoch": 0.36364994790282396, "grad_norm": 30.125, "learning_rate": 0.0001524460225398449, "loss": 2.8321, "step": 11940 }, { "epoch": 0.3639545123483037, "grad_norm": 30.0, "learning_rate": 0.00015236767962618865, "loss": 2.8296, "step": 11950 }, { "epoch": 0.36425907679378344, "grad_norm": 30.875, "learning_rate": 0.0001522892954477793, "loss": 2.873, "step": 11960 }, { "epoch": 0.3645636412392632, "grad_norm": 34.5, "learning_rate": 0.00015221087008096055, "loss": 2.867, "step": 11970 }, { "epoch": 0.364868205684743, "grad_norm": 31.125, "learning_rate": 0.0001521324036021161, "loss": 2.8575, "step": 11980 }, { "epoch": 0.3651727701302227, "grad_norm": 34.5, "learning_rate": 0.0001520538960876698, "loss": 2.815, "step": 11990 }, { "epoch": 0.3654773345757025, "grad_norm": 32.25, "learning_rate": 0.00015197534761408543, "loss": 2.8239, "step": 12000 }, { "epoch": 0.36578189902118224, "grad_norm": 32.25, "learning_rate": 0.00015189675825786668, "loss": 2.8452, "step": 12010 }, { "epoch": 0.36608646346666196, "grad_norm": 31.5, "learning_rate": 0.00015181812809555702, "loss": 2.8384, "step": 12020 }, { "epoch": 0.36639102791214173, "grad_norm": 30.875, "learning_rate": 0.00015173945720373973, "loss": 2.8046, "step": 12030 }, { "epoch": 0.3666955923576215, "grad_norm": 30.0, "learning_rate": 0.00015166074565903768, "loss": 2.8288, "step": 12040 }, { "epoch": 0.3670001568031012, "grad_norm": 31.5, "learning_rate": 0.00015158199353811343, "loss": 2.8327, "step": 12050 }, { "epoch": 0.367304721248581, "grad_norm": 31.125, "learning_rate": 0.00015150320091766894, "loss": 2.8306, "step": 12060 }, { "epoch": 0.3676092856940607, "grad_norm": 30.75, "learning_rate": 0.00015142436787444571, "loss": 2.8344, "step": 12070 }, { "epoch": 0.3679138501395405, "grad_norm": 29.875, "learning_rate": 0.00015134549448522463, "loss": 2.8393, "step": 12080 }, { "epoch": 0.36821841458502025, "grad_norm": 30.5, "learning_rate": 0.00015126658082682573, "loss": 2.8376, "step": 12090 }, { "epoch": 0.36852297903049996, "grad_norm": 31.125, "learning_rate": 0.00015118762697610846, "loss": 2.8183, "step": 12100 }, { "epoch": 0.36882754347597974, "grad_norm": 31.125, "learning_rate": 0.0001511086330099713, "loss": 2.8553, "step": 12110 }, { "epoch": 0.3691321079214595, "grad_norm": 33.75, "learning_rate": 0.00015102959900535176, "loss": 2.8183, "step": 12120 }, { "epoch": 0.3694366723669392, "grad_norm": 30.5, "learning_rate": 0.00015095052503922652, "loss": 2.8579, "step": 12130 }, { "epoch": 0.369741236812419, "grad_norm": 31.25, "learning_rate": 0.00015087141118861098, "loss": 2.8395, "step": 12140 }, { "epoch": 0.37004580125789877, "grad_norm": 32.75, "learning_rate": 0.00015079225753055956, "loss": 2.8628, "step": 12150 }, { "epoch": 0.3703503657033785, "grad_norm": 32.25, "learning_rate": 0.00015071306414216534, "loss": 2.8198, "step": 12160 }, { "epoch": 0.37065493014885825, "grad_norm": 33.25, "learning_rate": 0.00015063383110056004, "loss": 2.8254, "step": 12170 }, { "epoch": 0.370959494594338, "grad_norm": 29.875, "learning_rate": 0.00015055455848291422, "loss": 2.8393, "step": 12180 }, { "epoch": 0.37126405903981774, "grad_norm": 31.625, "learning_rate": 0.00015047524636643676, "loss": 2.8401, "step": 12190 }, { "epoch": 0.3715686234852975, "grad_norm": 32.25, "learning_rate": 0.00015039589482837513, "loss": 2.8616, "step": 12200 }, { "epoch": 0.3718731879307773, "grad_norm": 31.5, "learning_rate": 0.00015031650394601518, "loss": 2.8563, "step": 12210 }, { "epoch": 0.372177752376257, "grad_norm": 30.125, "learning_rate": 0.00015023707379668104, "loss": 2.8155, "step": 12220 }, { "epoch": 0.37248231682173677, "grad_norm": 30.875, "learning_rate": 0.00015015760445773516, "loss": 2.8363, "step": 12230 }, { "epoch": 0.37278688126721654, "grad_norm": 30.875, "learning_rate": 0.00015007809600657806, "loss": 2.818, "step": 12240 }, { "epoch": 0.37309144571269626, "grad_norm": 30.75, "learning_rate": 0.00014999854852064843, "loss": 2.8278, "step": 12250 }, { "epoch": 0.37339601015817603, "grad_norm": 30.875, "learning_rate": 0.00014991896207742296, "loss": 2.8343, "step": 12260 }, { "epoch": 0.3737005746036558, "grad_norm": 31.875, "learning_rate": 0.0001498393367544163, "loss": 2.8299, "step": 12270 }, { "epoch": 0.3740051390491355, "grad_norm": 30.25, "learning_rate": 0.00014975967262918086, "loss": 2.846, "step": 12280 }, { "epoch": 0.3743097034946153, "grad_norm": 31.375, "learning_rate": 0.00014967996977930702, "loss": 2.8666, "step": 12290 }, { "epoch": 0.374614267940095, "grad_norm": 31.375, "learning_rate": 0.00014960022828242278, "loss": 2.8304, "step": 12300 }, { "epoch": 0.3749188323855748, "grad_norm": 30.5, "learning_rate": 0.0001495204482161937, "loss": 2.8194, "step": 12310 }, { "epoch": 0.37522339683105455, "grad_norm": 31.125, "learning_rate": 0.00014944062965832307, "loss": 2.8433, "step": 12320 }, { "epoch": 0.37552796127653426, "grad_norm": 31.125, "learning_rate": 0.00014936077268655156, "loss": 2.8189, "step": 12330 }, { "epoch": 0.37583252572201403, "grad_norm": 30.5, "learning_rate": 0.00014928087737865734, "loss": 2.8722, "step": 12340 }, { "epoch": 0.3761370901674938, "grad_norm": 31.75, "learning_rate": 0.00014920094381245576, "loss": 2.8554, "step": 12350 }, { "epoch": 0.3764416546129735, "grad_norm": 31.875, "learning_rate": 0.00014912097206579958, "loss": 2.8584, "step": 12360 }, { "epoch": 0.3767462190584533, "grad_norm": 32.25, "learning_rate": 0.00014904096221657876, "loss": 2.839, "step": 12370 }, { "epoch": 0.37705078350393306, "grad_norm": 31.0, "learning_rate": 0.00014896091434272023, "loss": 2.8378, "step": 12380 }, { "epoch": 0.3773553479494128, "grad_norm": 31.625, "learning_rate": 0.0001488808285221881, "loss": 2.8717, "step": 12390 }, { "epoch": 0.37765991239489255, "grad_norm": 30.0, "learning_rate": 0.0001488007048329833, "loss": 2.8149, "step": 12400 }, { "epoch": 0.3779644768403723, "grad_norm": 30.375, "learning_rate": 0.00014872054335314378, "loss": 2.8808, "step": 12410 }, { "epoch": 0.37826904128585204, "grad_norm": 32.5, "learning_rate": 0.00014864034416074424, "loss": 2.8534, "step": 12420 }, { "epoch": 0.3785736057313318, "grad_norm": 30.5, "learning_rate": 0.00014856010733389603, "loss": 2.8486, "step": 12430 }, { "epoch": 0.3788781701768116, "grad_norm": 31.625, "learning_rate": 0.00014847983295074736, "loss": 2.8235, "step": 12440 }, { "epoch": 0.3791827346222913, "grad_norm": 31.25, "learning_rate": 0.00014839952108948275, "loss": 2.8418, "step": 12450 }, { "epoch": 0.37948729906777107, "grad_norm": 31.125, "learning_rate": 0.00014831917182832343, "loss": 2.8302, "step": 12460 }, { "epoch": 0.37979186351325084, "grad_norm": 32.0, "learning_rate": 0.00014823878524552702, "loss": 2.8516, "step": 12470 }, { "epoch": 0.38009642795873055, "grad_norm": 31.875, "learning_rate": 0.0001481583614193874, "loss": 2.8403, "step": 12480 }, { "epoch": 0.3804009924042103, "grad_norm": 33.75, "learning_rate": 0.0001480779004282348, "loss": 2.7925, "step": 12490 }, { "epoch": 0.3807055568496901, "grad_norm": 31.375, "learning_rate": 0.00014799740235043566, "loss": 2.8486, "step": 12500 }, { "epoch": 0.3810101212951698, "grad_norm": 30.625, "learning_rate": 0.00014791686726439243, "loss": 2.8265, "step": 12510 }, { "epoch": 0.3813146857406496, "grad_norm": 31.125, "learning_rate": 0.00014783629524854378, "loss": 2.8145, "step": 12520 }, { "epoch": 0.3816192501861293, "grad_norm": 30.25, "learning_rate": 0.00014775568638136425, "loss": 2.8436, "step": 12530 }, { "epoch": 0.38192381463160907, "grad_norm": 31.125, "learning_rate": 0.0001476750407413642, "loss": 2.8119, "step": 12540 }, { "epoch": 0.38222837907708884, "grad_norm": 31.25, "learning_rate": 0.00014759435840708992, "loss": 2.8322, "step": 12550 }, { "epoch": 0.38253294352256856, "grad_norm": 30.25, "learning_rate": 0.00014751363945712337, "loss": 2.8226, "step": 12560 }, { "epoch": 0.38283750796804833, "grad_norm": 31.0, "learning_rate": 0.0001474328839700823, "loss": 2.8262, "step": 12570 }, { "epoch": 0.3831420724135281, "grad_norm": 30.625, "learning_rate": 0.00014735209202461984, "loss": 2.8646, "step": 12580 }, { "epoch": 0.3834466368590078, "grad_norm": 30.875, "learning_rate": 0.0001472712636994248, "loss": 2.804, "step": 12590 }, { "epoch": 0.3837512013044876, "grad_norm": 30.75, "learning_rate": 0.00014719039907322133, "loss": 2.8486, "step": 12600 }, { "epoch": 0.38405576574996736, "grad_norm": 32.0, "learning_rate": 0.00014710949822476898, "loss": 2.8254, "step": 12610 }, { "epoch": 0.3843603301954471, "grad_norm": 33.25, "learning_rate": 0.0001470285612328626, "loss": 2.8375, "step": 12620 }, { "epoch": 0.38466489464092685, "grad_norm": 33.25, "learning_rate": 0.00014694758817633216, "loss": 2.8131, "step": 12630 }, { "epoch": 0.3849694590864066, "grad_norm": 29.625, "learning_rate": 0.00014686657913404283, "loss": 2.851, "step": 12640 }, { "epoch": 0.38527402353188633, "grad_norm": 32.5, "learning_rate": 0.00014678553418489478, "loss": 2.8656, "step": 12650 }, { "epoch": 0.3855785879773661, "grad_norm": 33.5, "learning_rate": 0.00014670445340782316, "loss": 2.8222, "step": 12660 }, { "epoch": 0.3858831524228459, "grad_norm": 31.25, "learning_rate": 0.00014662333688179814, "loss": 2.8484, "step": 12670 }, { "epoch": 0.3861877168683256, "grad_norm": 30.25, "learning_rate": 0.00014654218468582446, "loss": 2.824, "step": 12680 }, { "epoch": 0.38649228131380536, "grad_norm": 31.0, "learning_rate": 0.00014646099689894185, "loss": 2.8755, "step": 12690 }, { "epoch": 0.38679684575928514, "grad_norm": 31.25, "learning_rate": 0.0001463797736002245, "loss": 2.8158, "step": 12700 }, { "epoch": 0.38710141020476485, "grad_norm": 30.125, "learning_rate": 0.00014629851486878135, "loss": 2.844, "step": 12710 }, { "epoch": 0.3874059746502446, "grad_norm": 29.875, "learning_rate": 0.00014621722078375578, "loss": 2.821, "step": 12720 }, { "epoch": 0.3877105390957244, "grad_norm": 31.125, "learning_rate": 0.00014613589142432556, "loss": 2.8209, "step": 12730 }, { "epoch": 0.3880151035412041, "grad_norm": 31.0, "learning_rate": 0.0001460545268697029, "loss": 2.8306, "step": 12740 }, { "epoch": 0.3883196679866839, "grad_norm": 32.25, "learning_rate": 0.00014597312719913423, "loss": 2.8466, "step": 12750 }, { "epoch": 0.3886242324321636, "grad_norm": 30.75, "learning_rate": 0.00014589169249190024, "loss": 2.8023, "step": 12760 }, { "epoch": 0.38892879687764337, "grad_norm": 32.75, "learning_rate": 0.0001458102228273157, "loss": 2.8353, "step": 12770 }, { "epoch": 0.38923336132312314, "grad_norm": 31.375, "learning_rate": 0.00014572871828472938, "loss": 2.803, "step": 12780 }, { "epoch": 0.38953792576860286, "grad_norm": 31.375, "learning_rate": 0.00014564717894352414, "loss": 2.8408, "step": 12790 }, { "epoch": 0.3898424902140826, "grad_norm": 31.625, "learning_rate": 0.0001455656048831166, "loss": 2.8256, "step": 12800 }, { "epoch": 0.3901470546595624, "grad_norm": 31.25, "learning_rate": 0.00014548399618295735, "loss": 2.8293, "step": 12810 }, { "epoch": 0.3904516191050421, "grad_norm": 32.75, "learning_rate": 0.00014540235292253057, "loss": 2.8361, "step": 12820 }, { "epoch": 0.3907561835505219, "grad_norm": 30.875, "learning_rate": 0.00014532067518135418, "loss": 2.812, "step": 12830 }, { "epoch": 0.39106074799600166, "grad_norm": 31.5, "learning_rate": 0.00014523896303897962, "loss": 2.7982, "step": 12840 }, { "epoch": 0.39136531244148137, "grad_norm": 30.875, "learning_rate": 0.00014515721657499197, "loss": 2.8435, "step": 12850 }, { "epoch": 0.39166987688696114, "grad_norm": 31.125, "learning_rate": 0.00014507543586900963, "loss": 2.8069, "step": 12860 }, { "epoch": 0.3919744413324409, "grad_norm": 30.5, "learning_rate": 0.00014499362100068428, "loss": 2.8428, "step": 12870 }, { "epoch": 0.39227900577792063, "grad_norm": 31.75, "learning_rate": 0.00014491177204970103, "loss": 2.8181, "step": 12880 }, { "epoch": 0.3925835702234004, "grad_norm": 30.875, "learning_rate": 0.00014482988909577814, "loss": 2.7989, "step": 12890 }, { "epoch": 0.3928881346688802, "grad_norm": 31.875, "learning_rate": 0.0001447479722186669, "loss": 2.8372, "step": 12900 }, { "epoch": 0.3931926991143599, "grad_norm": 32.5, "learning_rate": 0.00014466602149815176, "loss": 2.8452, "step": 12910 }, { "epoch": 0.39349726355983966, "grad_norm": 32.25, "learning_rate": 0.00014458403701405007, "loss": 2.8486, "step": 12920 }, { "epoch": 0.39380182800531943, "grad_norm": 31.875, "learning_rate": 0.00014450201884621205, "loss": 2.7958, "step": 12930 }, { "epoch": 0.39410639245079915, "grad_norm": 31.0, "learning_rate": 0.00014441996707452074, "loss": 2.8178, "step": 12940 }, { "epoch": 0.3944109568962789, "grad_norm": 30.625, "learning_rate": 0.000144337881778892, "loss": 2.8781, "step": 12950 }, { "epoch": 0.3947155213417587, "grad_norm": 31.125, "learning_rate": 0.00014425576303927416, "loss": 2.8491, "step": 12960 }, { "epoch": 0.3950200857872384, "grad_norm": 32.5, "learning_rate": 0.00014417361093564828, "loss": 2.8481, "step": 12970 }, { "epoch": 0.3953246502327182, "grad_norm": 34.25, "learning_rate": 0.0001440914255480278, "loss": 2.8408, "step": 12980 }, { "epoch": 0.3956292146781979, "grad_norm": 32.5, "learning_rate": 0.0001440092069564587, "loss": 2.824, "step": 12990 }, { "epoch": 0.39593377912367766, "grad_norm": 32.25, "learning_rate": 0.0001439269552410192, "loss": 2.8544, "step": 13000 }, { "epoch": 0.39623834356915744, "grad_norm": 30.625, "learning_rate": 0.0001438446704818199, "loss": 2.8213, "step": 13010 }, { "epoch": 0.39654290801463715, "grad_norm": 32.0, "learning_rate": 0.00014376235275900335, "loss": 2.8185, "step": 13020 }, { "epoch": 0.3968474724601169, "grad_norm": 31.125, "learning_rate": 0.00014368000215274443, "loss": 2.8275, "step": 13030 }, { "epoch": 0.3971520369055967, "grad_norm": 30.0, "learning_rate": 0.00014359761874325005, "loss": 2.8281, "step": 13040 }, { "epoch": 0.3974566013510764, "grad_norm": 33.0, "learning_rate": 0.0001435152026107589, "loss": 2.8311, "step": 13050 }, { "epoch": 0.3977611657965562, "grad_norm": 33.75, "learning_rate": 0.00014343275383554163, "loss": 2.865, "step": 13060 }, { "epoch": 0.39806573024203595, "grad_norm": 31.25, "learning_rate": 0.00014335027249790075, "loss": 2.8088, "step": 13070 }, { "epoch": 0.39837029468751567, "grad_norm": 32.5, "learning_rate": 0.00014326775867817037, "loss": 2.8685, "step": 13080 }, { "epoch": 0.39867485913299544, "grad_norm": 29.375, "learning_rate": 0.00014318521245671633, "loss": 2.8151, "step": 13090 }, { "epoch": 0.3989794235784752, "grad_norm": 30.125, "learning_rate": 0.00014310263391393592, "loss": 2.8554, "step": 13100 }, { "epoch": 0.3992839880239549, "grad_norm": 31.125, "learning_rate": 0.000143020023130258, "loss": 2.8515, "step": 13110 }, { "epoch": 0.3995885524694347, "grad_norm": 30.25, "learning_rate": 0.0001429373801861429, "loss": 2.7975, "step": 13120 }, { "epoch": 0.39989311691491447, "grad_norm": 31.5, "learning_rate": 0.00014285470516208206, "loss": 2.8217, "step": 13130 }, { "epoch": 0.4001976813603942, "grad_norm": 30.75, "learning_rate": 0.00014277199813859836, "loss": 2.8291, "step": 13140 }, { "epoch": 0.40050224580587396, "grad_norm": 31.75, "learning_rate": 0.00014268925919624573, "loss": 2.8135, "step": 13150 }, { "epoch": 0.40080681025135373, "grad_norm": 30.25, "learning_rate": 0.0001426064884156093, "loss": 2.859, "step": 13160 }, { "epoch": 0.40111137469683344, "grad_norm": 31.625, "learning_rate": 0.00014252368587730501, "loss": 2.8302, "step": 13170 }, { "epoch": 0.4014159391423132, "grad_norm": 32.0, "learning_rate": 0.00014244085166197998, "loss": 2.7867, "step": 13180 }, { "epoch": 0.401720503587793, "grad_norm": 31.125, "learning_rate": 0.00014235798585031202, "loss": 2.8451, "step": 13190 }, { "epoch": 0.4020250680332727, "grad_norm": 30.75, "learning_rate": 0.00014227508852300975, "loss": 2.7998, "step": 13200 }, { "epoch": 0.4023296324787525, "grad_norm": 31.0, "learning_rate": 0.0001421921597608125, "loss": 2.8183, "step": 13210 }, { "epoch": 0.4026341969242322, "grad_norm": 31.375, "learning_rate": 0.00014210919964449022, "loss": 2.8617, "step": 13220 }, { "epoch": 0.40293876136971196, "grad_norm": 30.625, "learning_rate": 0.00014202620825484337, "loss": 2.8415, "step": 13230 }, { "epoch": 0.40324332581519173, "grad_norm": 31.5, "learning_rate": 0.0001419431856727029, "loss": 2.8429, "step": 13240 }, { "epoch": 0.40354789026067145, "grad_norm": 31.25, "learning_rate": 0.00014186013197893012, "loss": 2.8158, "step": 13250 }, { "epoch": 0.4038524547061512, "grad_norm": 30.875, "learning_rate": 0.00014177704725441666, "loss": 2.8242, "step": 13260 }, { "epoch": 0.404157019151631, "grad_norm": 33.25, "learning_rate": 0.00014169393158008431, "loss": 2.8323, "step": 13270 }, { "epoch": 0.4044615835971107, "grad_norm": 33.75, "learning_rate": 0.00014161078503688516, "loss": 2.799, "step": 13280 }, { "epoch": 0.4047661480425905, "grad_norm": 30.0, "learning_rate": 0.00014152760770580117, "loss": 2.8357, "step": 13290 }, { "epoch": 0.40507071248807025, "grad_norm": 30.875, "learning_rate": 0.00014144439966784438, "loss": 2.8781, "step": 13300 }, { "epoch": 0.40537527693354997, "grad_norm": 31.625, "learning_rate": 0.00014136116100405674, "loss": 2.8234, "step": 13310 }, { "epoch": 0.40567984137902974, "grad_norm": 31.75, "learning_rate": 0.0001412778917955101, "loss": 2.8295, "step": 13320 }, { "epoch": 0.4059844058245095, "grad_norm": 31.75, "learning_rate": 0.00014119459212330588, "loss": 2.8431, "step": 13330 }, { "epoch": 0.4062889702699892, "grad_norm": 31.5, "learning_rate": 0.00014111126206857533, "loss": 2.7752, "step": 13340 }, { "epoch": 0.406593534715469, "grad_norm": 29.875, "learning_rate": 0.0001410279017124792, "loss": 2.8241, "step": 13350 }, { "epoch": 0.40689809916094877, "grad_norm": 30.5, "learning_rate": 0.00014094451113620785, "loss": 2.8098, "step": 13360 }, { "epoch": 0.4072026636064285, "grad_norm": 31.0, "learning_rate": 0.00014086109042098097, "loss": 2.8017, "step": 13370 }, { "epoch": 0.40750722805190825, "grad_norm": 31.125, "learning_rate": 0.00014077763964804763, "loss": 2.8308, "step": 13380 }, { "epoch": 0.407811792497388, "grad_norm": 31.875, "learning_rate": 0.0001406941588986862, "loss": 2.8605, "step": 13390 }, { "epoch": 0.40811635694286774, "grad_norm": 30.25, "learning_rate": 0.00014061064825420422, "loss": 2.8135, "step": 13400 }, { "epoch": 0.4084209213883475, "grad_norm": 32.25, "learning_rate": 0.00014052710779593843, "loss": 2.8261, "step": 13410 }, { "epoch": 0.4087254858338273, "grad_norm": 31.5, "learning_rate": 0.00014044353760525447, "loss": 2.8047, "step": 13420 }, { "epoch": 0.409030050279307, "grad_norm": 30.625, "learning_rate": 0.00014035993776354707, "loss": 2.8126, "step": 13430 }, { "epoch": 0.40933461472478677, "grad_norm": 32.0, "learning_rate": 0.0001402763083522397, "loss": 2.8045, "step": 13440 }, { "epoch": 0.40963917917026654, "grad_norm": 32.75, "learning_rate": 0.0001401926494527848, "loss": 2.8536, "step": 13450 }, { "epoch": 0.40994374361574626, "grad_norm": 31.875, "learning_rate": 0.00014010896114666335, "loss": 2.8048, "step": 13460 }, { "epoch": 0.41024830806122603, "grad_norm": 33.5, "learning_rate": 0.00014002524351538516, "loss": 2.8448, "step": 13470 }, { "epoch": 0.41055287250670575, "grad_norm": 30.625, "learning_rate": 0.0001399414966404884, "loss": 2.8654, "step": 13480 }, { "epoch": 0.4108574369521855, "grad_norm": 32.5, "learning_rate": 0.00013985772060353988, "loss": 2.807, "step": 13490 }, { "epoch": 0.4111620013976653, "grad_norm": 32.25, "learning_rate": 0.00013977391548613473, "loss": 2.8117, "step": 13500 }, { "epoch": 0.411466565843145, "grad_norm": 31.0, "learning_rate": 0.00013969008136989648, "loss": 2.8498, "step": 13510 }, { "epoch": 0.4117711302886248, "grad_norm": 31.0, "learning_rate": 0.00013960621833647682, "loss": 2.8288, "step": 13520 }, { "epoch": 0.41207569473410455, "grad_norm": 33.5, "learning_rate": 0.00013952232646755562, "loss": 2.8369, "step": 13530 }, { "epoch": 0.41238025917958426, "grad_norm": 31.0, "learning_rate": 0.0001394384058448409, "loss": 2.8212, "step": 13540 }, { "epoch": 0.41268482362506403, "grad_norm": 30.75, "learning_rate": 0.0001393544565500686, "loss": 2.8079, "step": 13550 }, { "epoch": 0.4129893880705438, "grad_norm": 31.5, "learning_rate": 0.00013927047866500264, "loss": 2.8025, "step": 13560 }, { "epoch": 0.4132939525160235, "grad_norm": 31.0, "learning_rate": 0.0001391864722714348, "loss": 2.7787, "step": 13570 }, { "epoch": 0.4135985169615033, "grad_norm": 30.5, "learning_rate": 0.0001391024374511845, "loss": 2.8436, "step": 13580 }, { "epoch": 0.41390308140698306, "grad_norm": 31.375, "learning_rate": 0.00013901837428609908, "loss": 2.8578, "step": 13590 }, { "epoch": 0.4142076458524628, "grad_norm": 30.0, "learning_rate": 0.00013893428285805326, "loss": 2.8161, "step": 13600 }, { "epoch": 0.41451221029794255, "grad_norm": 32.0, "learning_rate": 0.00013885016324894934, "loss": 2.8098, "step": 13610 }, { "epoch": 0.4148167747434223, "grad_norm": 33.0, "learning_rate": 0.00013876601554071725, "loss": 2.8032, "step": 13620 }, { "epoch": 0.41512133918890204, "grad_norm": 30.875, "learning_rate": 0.000138681839815314, "loss": 2.8219, "step": 13630 }, { "epoch": 0.4154259036343818, "grad_norm": 32.75, "learning_rate": 0.00013859763615472416, "loss": 2.8207, "step": 13640 }, { "epoch": 0.4157304680798616, "grad_norm": 31.5, "learning_rate": 0.00013851340464095924, "loss": 2.8014, "step": 13650 }, { "epoch": 0.4160350325253413, "grad_norm": 31.25, "learning_rate": 0.00013842914535605813, "loss": 2.8446, "step": 13660 }, { "epoch": 0.41633959697082107, "grad_norm": 31.375, "learning_rate": 0.0001383448583820866, "loss": 2.794, "step": 13670 }, { "epoch": 0.41664416141630084, "grad_norm": 30.25, "learning_rate": 0.00013826054380113742, "loss": 2.81, "step": 13680 }, { "epoch": 0.41694872586178056, "grad_norm": 31.75, "learning_rate": 0.00013817620169533033, "loss": 2.8156, "step": 13690 }, { "epoch": 0.4172532903072603, "grad_norm": 30.625, "learning_rate": 0.00013809183214681182, "loss": 2.8254, "step": 13700 }, { "epoch": 0.41755785475274004, "grad_norm": 31.25, "learning_rate": 0.00013800743523775507, "loss": 2.8503, "step": 13710 }, { "epoch": 0.4178624191982198, "grad_norm": 31.0, "learning_rate": 0.00013792301105035993, "loss": 2.82, "step": 13720 }, { "epoch": 0.4181669836436996, "grad_norm": 30.75, "learning_rate": 0.0001378385596668529, "loss": 2.7911, "step": 13730 }, { "epoch": 0.4184715480891793, "grad_norm": 29.375, "learning_rate": 0.00013775408116948685, "loss": 2.7774, "step": 13740 }, { "epoch": 0.4187761125346591, "grad_norm": 31.375, "learning_rate": 0.00013766957564054116, "loss": 2.8319, "step": 13750 }, { "epoch": 0.41908067698013884, "grad_norm": 32.75, "learning_rate": 0.00013758504316232142, "loss": 2.8242, "step": 13760 }, { "epoch": 0.41938524142561856, "grad_norm": 32.25, "learning_rate": 0.0001375004838171596, "loss": 2.8227, "step": 13770 }, { "epoch": 0.41968980587109833, "grad_norm": 31.125, "learning_rate": 0.00013741589768741374, "loss": 2.8372, "step": 13780 }, { "epoch": 0.4199943703165781, "grad_norm": 31.875, "learning_rate": 0.000137331284855468, "loss": 2.8051, "step": 13790 }, { "epoch": 0.4202989347620578, "grad_norm": 32.25, "learning_rate": 0.00013724664540373257, "loss": 2.8214, "step": 13800 }, { "epoch": 0.4206034992075376, "grad_norm": 33.5, "learning_rate": 0.00013716197941464355, "loss": 2.8329, "step": 13810 }, { "epoch": 0.42090806365301736, "grad_norm": 31.5, "learning_rate": 0.00013707728697066285, "loss": 2.8034, "step": 13820 }, { "epoch": 0.4212126280984971, "grad_norm": 31.375, "learning_rate": 0.0001369925681542782, "loss": 2.7835, "step": 13830 }, { "epoch": 0.42151719254397685, "grad_norm": 30.875, "learning_rate": 0.00013690782304800298, "loss": 2.8345, "step": 13840 }, { "epoch": 0.4218217569894566, "grad_norm": 31.125, "learning_rate": 0.00013682305173437622, "loss": 2.8079, "step": 13850 }, { "epoch": 0.42212632143493634, "grad_norm": 32.0, "learning_rate": 0.0001367382542959624, "loss": 2.8395, "step": 13860 }, { "epoch": 0.4224308858804161, "grad_norm": 30.5, "learning_rate": 0.0001366534308153515, "loss": 2.7838, "step": 13870 }, { "epoch": 0.4227354503258959, "grad_norm": 30.25, "learning_rate": 0.0001365685813751589, "loss": 2.8144, "step": 13880 }, { "epoch": 0.4230400147713756, "grad_norm": 30.625, "learning_rate": 0.00013648370605802513, "loss": 2.8017, "step": 13890 }, { "epoch": 0.42334457921685537, "grad_norm": 32.25, "learning_rate": 0.00013639880494661612, "loss": 2.8188, "step": 13900 }, { "epoch": 0.42364914366233514, "grad_norm": 31.0, "learning_rate": 0.00013631387812362275, "loss": 2.8, "step": 13910 }, { "epoch": 0.42395370810781485, "grad_norm": 31.375, "learning_rate": 0.00013622892567176101, "loss": 2.7899, "step": 13920 }, { "epoch": 0.4242582725532946, "grad_norm": 30.875, "learning_rate": 0.00013614394767377184, "loss": 2.8276, "step": 13930 }, { "epoch": 0.42456283699877434, "grad_norm": 30.5, "learning_rate": 0.0001360589442124211, "loss": 2.8036, "step": 13940 }, { "epoch": 0.4248674014442541, "grad_norm": 30.875, "learning_rate": 0.00013597391537049938, "loss": 2.8396, "step": 13950 }, { "epoch": 0.4251719658897339, "grad_norm": 31.0, "learning_rate": 0.00013588886123082203, "loss": 2.8005, "step": 13960 }, { "epoch": 0.4254765303352136, "grad_norm": 31.75, "learning_rate": 0.00013580378187622909, "loss": 2.7823, "step": 13970 }, { "epoch": 0.42578109478069337, "grad_norm": 30.25, "learning_rate": 0.00013571867738958505, "loss": 2.8125, "step": 13980 }, { "epoch": 0.42608565922617314, "grad_norm": 31.0, "learning_rate": 0.00013563354785377897, "loss": 2.8276, "step": 13990 }, { "epoch": 0.42639022367165286, "grad_norm": 31.5, "learning_rate": 0.00013554839335172425, "loss": 2.8288, "step": 14000 }, { "epoch": 0.42669478811713263, "grad_norm": 31.75, "learning_rate": 0.00013546321396635863, "loss": 2.8185, "step": 14010 }, { "epoch": 0.4269993525626124, "grad_norm": 30.625, "learning_rate": 0.0001353780097806441, "loss": 2.8371, "step": 14020 }, { "epoch": 0.4273039170080921, "grad_norm": 30.625, "learning_rate": 0.00013529278087756676, "loss": 2.7841, "step": 14030 }, { "epoch": 0.4276084814535719, "grad_norm": 32.25, "learning_rate": 0.00013520752734013684, "loss": 2.7973, "step": 14040 }, { "epoch": 0.42791304589905166, "grad_norm": 30.375, "learning_rate": 0.00013512224925138852, "loss": 2.802, "step": 14050 }, { "epoch": 0.4282176103445314, "grad_norm": 29.875, "learning_rate": 0.00013503694669437992, "loss": 2.8058, "step": 14060 }, { "epoch": 0.42852217479001115, "grad_norm": 35.75, "learning_rate": 0.00013495161975219297, "loss": 2.8341, "step": 14070 }, { "epoch": 0.4288267392354909, "grad_norm": 31.375, "learning_rate": 0.00013486626850793334, "loss": 2.8232, "step": 14080 }, { "epoch": 0.42913130368097063, "grad_norm": 31.625, "learning_rate": 0.00013478089304473043, "loss": 2.7968, "step": 14090 }, { "epoch": 0.4294358681264504, "grad_norm": 32.75, "learning_rate": 0.00013469549344573715, "loss": 2.8054, "step": 14100 }, { "epoch": 0.4297404325719302, "grad_norm": 31.0, "learning_rate": 0.00013461006979412996, "loss": 2.8203, "step": 14110 }, { "epoch": 0.4300449970174099, "grad_norm": 32.0, "learning_rate": 0.00013452462217310876, "loss": 2.8258, "step": 14120 }, { "epoch": 0.43034956146288966, "grad_norm": 31.75, "learning_rate": 0.00013443915066589676, "loss": 2.8272, "step": 14130 }, { "epoch": 0.43065412590836943, "grad_norm": 31.875, "learning_rate": 0.00013435365535574047, "loss": 2.8081, "step": 14140 }, { "epoch": 0.43095869035384915, "grad_norm": 30.5, "learning_rate": 0.00013426813632590954, "loss": 2.8094, "step": 14150 }, { "epoch": 0.4312632547993289, "grad_norm": 31.0, "learning_rate": 0.00013418259365969676, "loss": 2.8078, "step": 14160 }, { "epoch": 0.43156781924480864, "grad_norm": 30.375, "learning_rate": 0.00013409702744041793, "loss": 2.8057, "step": 14170 }, { "epoch": 0.4318723836902884, "grad_norm": 32.0, "learning_rate": 0.00013401143775141177, "loss": 2.8123, "step": 14180 }, { "epoch": 0.4321769481357682, "grad_norm": 30.5, "learning_rate": 0.00013392582467603988, "loss": 2.8197, "step": 14190 }, { "epoch": 0.4324815125812479, "grad_norm": 31.125, "learning_rate": 0.00013384018829768664, "loss": 2.8133, "step": 14200 }, { "epoch": 0.43278607702672767, "grad_norm": 30.75, "learning_rate": 0.0001337545286997591, "loss": 2.821, "step": 14210 }, { "epoch": 0.43309064147220744, "grad_norm": 29.75, "learning_rate": 0.00013366884596568697, "loss": 2.7839, "step": 14220 }, { "epoch": 0.43339520591768715, "grad_norm": 31.875, "learning_rate": 0.0001335831401789225, "loss": 2.7875, "step": 14230 }, { "epoch": 0.4336997703631669, "grad_norm": 31.625, "learning_rate": 0.0001334974114229403, "loss": 2.7806, "step": 14240 }, { "epoch": 0.4340043348086467, "grad_norm": 33.75, "learning_rate": 0.00013341165978123743, "loss": 2.8286, "step": 14250 }, { "epoch": 0.4343088992541264, "grad_norm": 33.0, "learning_rate": 0.0001333258853373332, "loss": 2.8176, "step": 14260 }, { "epoch": 0.4346134636996062, "grad_norm": 30.625, "learning_rate": 0.00013324008817476926, "loss": 2.8085, "step": 14270 }, { "epoch": 0.43491802814508596, "grad_norm": 30.75, "learning_rate": 0.00013315426837710916, "loss": 2.8337, "step": 14280 }, { "epoch": 0.43522259259056567, "grad_norm": 31.0, "learning_rate": 0.00013306842602793862, "loss": 2.8443, "step": 14290 }, { "epoch": 0.43552715703604544, "grad_norm": 30.75, "learning_rate": 0.00013298256121086537, "loss": 2.8119, "step": 14300 }, { "epoch": 0.4358317214815252, "grad_norm": 32.0, "learning_rate": 0.00013289667400951896, "loss": 2.7871, "step": 14310 }, { "epoch": 0.43613628592700493, "grad_norm": 30.0, "learning_rate": 0.00013281076450755074, "loss": 2.7719, "step": 14320 }, { "epoch": 0.4364408503724847, "grad_norm": 30.5, "learning_rate": 0.00013272483278863381, "loss": 2.8023, "step": 14330 }, { "epoch": 0.4367454148179645, "grad_norm": 32.0, "learning_rate": 0.00013263887893646286, "loss": 2.7862, "step": 14340 }, { "epoch": 0.4370499792634442, "grad_norm": 32.5, "learning_rate": 0.00013255290303475426, "loss": 2.8318, "step": 14350 }, { "epoch": 0.43735454370892396, "grad_norm": 32.0, "learning_rate": 0.00013246690516724563, "loss": 2.7674, "step": 14360 }, { "epoch": 0.43765910815440373, "grad_norm": 31.375, "learning_rate": 0.0001323808854176962, "loss": 2.7997, "step": 14370 }, { "epoch": 0.43796367259988345, "grad_norm": 32.5, "learning_rate": 0.0001322948438698864, "loss": 2.8337, "step": 14380 }, { "epoch": 0.4382682370453632, "grad_norm": 32.0, "learning_rate": 0.00013220878060761794, "loss": 2.7831, "step": 14390 }, { "epoch": 0.43857280149084293, "grad_norm": 32.5, "learning_rate": 0.00013212269571471368, "loss": 2.8224, "step": 14400 }, { "epoch": 0.4388773659363227, "grad_norm": 35.5, "learning_rate": 0.00013203658927501746, "loss": 2.8299, "step": 14410 }, { "epoch": 0.4391819303818025, "grad_norm": 30.625, "learning_rate": 0.00013195046137239426, "loss": 2.8476, "step": 14420 }, { "epoch": 0.4394864948272822, "grad_norm": 31.0, "learning_rate": 0.00013186431209072982, "loss": 2.8057, "step": 14430 }, { "epoch": 0.43979105927276196, "grad_norm": 31.0, "learning_rate": 0.00013177814151393077, "loss": 2.7615, "step": 14440 }, { "epoch": 0.44009562371824174, "grad_norm": 31.625, "learning_rate": 0.00013169194972592453, "loss": 2.8023, "step": 14450 }, { "epoch": 0.44040018816372145, "grad_norm": 31.625, "learning_rate": 0.00013160573681065903, "loss": 2.7442, "step": 14460 }, { "epoch": 0.4407047526092012, "grad_norm": 31.0, "learning_rate": 0.00013151950285210292, "loss": 2.8276, "step": 14470 }, { "epoch": 0.441009317054681, "grad_norm": 30.75, "learning_rate": 0.00013143324793424526, "loss": 2.8215, "step": 14480 }, { "epoch": 0.4413138815001607, "grad_norm": 32.25, "learning_rate": 0.0001313469721410956, "loss": 2.7726, "step": 14490 }, { "epoch": 0.4416184459456405, "grad_norm": 30.875, "learning_rate": 0.0001312606755566838, "loss": 2.8, "step": 14500 }, { "epoch": 0.44192301039112025, "grad_norm": 33.0, "learning_rate": 0.00013117435826505989, "loss": 2.8264, "step": 14510 }, { "epoch": 0.44222757483659997, "grad_norm": 32.25, "learning_rate": 0.00013108802035029414, "loss": 2.8018, "step": 14520 }, { "epoch": 0.44253213928207974, "grad_norm": 31.125, "learning_rate": 0.00013100166189647687, "loss": 2.7792, "step": 14530 }, { "epoch": 0.4428367037275595, "grad_norm": 31.375, "learning_rate": 0.00013091528298771847, "loss": 2.783, "step": 14540 }, { "epoch": 0.4431412681730392, "grad_norm": 31.875, "learning_rate": 0.00013082888370814923, "loss": 2.7967, "step": 14550 }, { "epoch": 0.443445832618519, "grad_norm": 31.0, "learning_rate": 0.00013074246414191922, "loss": 2.8093, "step": 14560 }, { "epoch": 0.44375039706399877, "grad_norm": 33.25, "learning_rate": 0.0001306560243731983, "loss": 2.8082, "step": 14570 }, { "epoch": 0.4440549615094785, "grad_norm": 33.5, "learning_rate": 0.00013056956448617607, "loss": 2.8137, "step": 14580 }, { "epoch": 0.44435952595495826, "grad_norm": 30.625, "learning_rate": 0.00013048308456506158, "loss": 2.8088, "step": 14590 }, { "epoch": 0.44466409040043803, "grad_norm": 31.125, "learning_rate": 0.00013039658469408356, "loss": 2.7747, "step": 14600 }, { "epoch": 0.44496865484591774, "grad_norm": 30.875, "learning_rate": 0.0001303100649574901, "loss": 2.8165, "step": 14610 }, { "epoch": 0.4452732192913975, "grad_norm": 31.25, "learning_rate": 0.0001302235254395486, "loss": 2.8012, "step": 14620 }, { "epoch": 0.44557778373687723, "grad_norm": 30.875, "learning_rate": 0.00013013696622454573, "loss": 2.8011, "step": 14630 }, { "epoch": 0.445882348182357, "grad_norm": 30.75, "learning_rate": 0.0001300503873967874, "loss": 2.8049, "step": 14640 }, { "epoch": 0.4461869126278368, "grad_norm": 30.375, "learning_rate": 0.00012996378904059865, "loss": 2.8349, "step": 14650 }, { "epoch": 0.4464914770733165, "grad_norm": 31.375, "learning_rate": 0.0001298771712403234, "loss": 2.8139, "step": 14660 }, { "epoch": 0.44679604151879626, "grad_norm": 32.25, "learning_rate": 0.00012979053408032463, "loss": 2.7789, "step": 14670 }, { "epoch": 0.44710060596427603, "grad_norm": 32.25, "learning_rate": 0.00012970387764498417, "loss": 2.788, "step": 14680 }, { "epoch": 0.44740517040975575, "grad_norm": 33.0, "learning_rate": 0.00012961720201870255, "loss": 2.8089, "step": 14690 }, { "epoch": 0.4477097348552355, "grad_norm": 32.25, "learning_rate": 0.00012953050728589904, "loss": 2.8008, "step": 14700 }, { "epoch": 0.4480142993007153, "grad_norm": 32.25, "learning_rate": 0.0001294437935310115, "loss": 2.836, "step": 14710 }, { "epoch": 0.448318863746195, "grad_norm": 33.0, "learning_rate": 0.00012935706083849638, "loss": 2.8031, "step": 14720 }, { "epoch": 0.4486234281916748, "grad_norm": 29.75, "learning_rate": 0.00012927030929282845, "loss": 2.7923, "step": 14730 }, { "epoch": 0.44892799263715455, "grad_norm": 31.875, "learning_rate": 0.00012918353897850102, "loss": 2.7839, "step": 14740 }, { "epoch": 0.44923255708263427, "grad_norm": 32.25, "learning_rate": 0.00012909674998002547, "loss": 2.8289, "step": 14750 }, { "epoch": 0.44953712152811404, "grad_norm": 31.25, "learning_rate": 0.00012900994238193155, "loss": 2.8006, "step": 14760 }, { "epoch": 0.4498416859735938, "grad_norm": 31.875, "learning_rate": 0.00012892311626876702, "loss": 2.794, "step": 14770 }, { "epoch": 0.4501462504190735, "grad_norm": 30.625, "learning_rate": 0.00012883627172509773, "loss": 2.8307, "step": 14780 }, { "epoch": 0.4504508148645533, "grad_norm": 32.5, "learning_rate": 0.00012874940883550746, "loss": 2.8105, "step": 14790 }, { "epoch": 0.45075537931003307, "grad_norm": 31.625, "learning_rate": 0.0001286625276845979, "loss": 2.7679, "step": 14800 }, { "epoch": 0.4510599437555128, "grad_norm": 31.0, "learning_rate": 0.00012857562835698845, "loss": 2.7514, "step": 14810 }, { "epoch": 0.45136450820099255, "grad_norm": 31.5, "learning_rate": 0.0001284887109373162, "loss": 2.7755, "step": 14820 }, { "epoch": 0.4516690726464723, "grad_norm": 30.25, "learning_rate": 0.00012840177551023604, "loss": 2.7896, "step": 14830 }, { "epoch": 0.45197363709195204, "grad_norm": 32.75, "learning_rate": 0.0001283148221604202, "loss": 2.7805, "step": 14840 }, { "epoch": 0.4522782015374318, "grad_norm": 30.875, "learning_rate": 0.00012822785097255841, "loss": 2.7967, "step": 14850 }, { "epoch": 0.45258276598291153, "grad_norm": 31.0, "learning_rate": 0.00012814086203135784, "loss": 2.7593, "step": 14860 }, { "epoch": 0.4528873304283913, "grad_norm": 31.625, "learning_rate": 0.00012805385542154291, "loss": 2.8279, "step": 14870 }, { "epoch": 0.45319189487387107, "grad_norm": 33.0, "learning_rate": 0.0001279668312278552, "loss": 2.8132, "step": 14880 }, { "epoch": 0.4534964593193508, "grad_norm": 31.75, "learning_rate": 0.00012787978953505356, "loss": 2.7928, "step": 14890 }, { "epoch": 0.45380102376483056, "grad_norm": 32.75, "learning_rate": 0.00012779273042791376, "loss": 2.834, "step": 14900 }, { "epoch": 0.45410558821031033, "grad_norm": 31.375, "learning_rate": 0.0001277056539912285, "loss": 2.8261, "step": 14910 }, { "epoch": 0.45441015265579005, "grad_norm": 33.25, "learning_rate": 0.00012761856030980747, "loss": 2.8209, "step": 14920 }, { "epoch": 0.4547147171012698, "grad_norm": 30.5, "learning_rate": 0.00012753144946847715, "loss": 2.803, "step": 14930 }, { "epoch": 0.4550192815467496, "grad_norm": 32.0, "learning_rate": 0.00012744432155208058, "loss": 2.8047, "step": 14940 }, { "epoch": 0.4553238459922293, "grad_norm": 32.0, "learning_rate": 0.00012735717664547763, "loss": 2.7975, "step": 14950 }, { "epoch": 0.4556284104377091, "grad_norm": 33.5, "learning_rate": 0.0001272700148335446, "loss": 2.7669, "step": 14960 }, { "epoch": 0.45593297488318885, "grad_norm": 33.0, "learning_rate": 0.00012718283620117424, "loss": 2.7982, "step": 14970 }, { "epoch": 0.45623753932866856, "grad_norm": 32.5, "learning_rate": 0.0001270956408332758, "loss": 2.8085, "step": 14980 }, { "epoch": 0.45654210377414833, "grad_norm": 31.125, "learning_rate": 0.0001270084288147747, "loss": 2.7978, "step": 14990 }, { "epoch": 0.4568466682196281, "grad_norm": 30.75, "learning_rate": 0.00012692120023061263, "loss": 2.7954, "step": 15000 }, { "epoch": 0.4571512326651078, "grad_norm": 31.375, "learning_rate": 0.0001268339551657475, "loss": 2.807, "step": 15010 }, { "epoch": 0.4574557971105876, "grad_norm": 31.75, "learning_rate": 0.0001267466937051531, "loss": 2.8079, "step": 15020 }, { "epoch": 0.45776036155606736, "grad_norm": 30.75, "learning_rate": 0.00012665941593381928, "loss": 2.7738, "step": 15030 }, { "epoch": 0.4580649260015471, "grad_norm": 31.25, "learning_rate": 0.00012657212193675183, "loss": 2.7485, "step": 15040 }, { "epoch": 0.45836949044702685, "grad_norm": 31.625, "learning_rate": 0.00012648481179897226, "loss": 2.7834, "step": 15050 }, { "epoch": 0.4586740548925066, "grad_norm": 30.875, "learning_rate": 0.00012639748560551778, "loss": 2.7917, "step": 15060 }, { "epoch": 0.45897861933798634, "grad_norm": 32.5, "learning_rate": 0.0001263101434414414, "loss": 2.7895, "step": 15070 }, { "epoch": 0.4592831837834661, "grad_norm": 32.5, "learning_rate": 0.00012622278539181143, "loss": 2.8033, "step": 15080 }, { "epoch": 0.4595877482289458, "grad_norm": 30.0, "learning_rate": 0.00012613541154171188, "loss": 2.8197, "step": 15090 }, { "epoch": 0.4598923126744256, "grad_norm": 32.0, "learning_rate": 0.00012604802197624203, "loss": 2.8204, "step": 15100 }, { "epoch": 0.46019687711990537, "grad_norm": 33.25, "learning_rate": 0.00012596061678051652, "loss": 2.8062, "step": 15110 }, { "epoch": 0.4605014415653851, "grad_norm": 31.375, "learning_rate": 0.0001258731960396652, "loss": 2.8162, "step": 15120 }, { "epoch": 0.46080600601086485, "grad_norm": 32.0, "learning_rate": 0.000125785759838833, "loss": 2.8032, "step": 15130 }, { "epoch": 0.4611105704563446, "grad_norm": 30.375, "learning_rate": 0.00012569830826318003, "loss": 2.7954, "step": 15140 }, { "epoch": 0.46141513490182434, "grad_norm": 30.875, "learning_rate": 0.00012561084139788123, "loss": 2.7723, "step": 15150 }, { "epoch": 0.4617196993473041, "grad_norm": 31.125, "learning_rate": 0.00012552335932812652, "loss": 2.8134, "step": 15160 }, { "epoch": 0.4620242637927839, "grad_norm": 31.5, "learning_rate": 0.0001254358621391207, "loss": 2.78, "step": 15170 }, { "epoch": 0.4623288282382636, "grad_norm": 31.375, "learning_rate": 0.0001253483499160831, "loss": 2.7957, "step": 15180 }, { "epoch": 0.46263339268374337, "grad_norm": 31.625, "learning_rate": 0.0001252608227442478, "loss": 2.7857, "step": 15190 }, { "epoch": 0.46293795712922314, "grad_norm": 30.625, "learning_rate": 0.0001251732807088635, "loss": 2.7688, "step": 15200 }, { "epoch": 0.46324252157470286, "grad_norm": 31.75, "learning_rate": 0.00012508572389519334, "loss": 2.829, "step": 15210 }, { "epoch": 0.46354708602018263, "grad_norm": 32.25, "learning_rate": 0.0001249981523885148, "loss": 2.7787, "step": 15220 }, { "epoch": 0.4638516504656624, "grad_norm": 31.875, "learning_rate": 0.00012491056627411969, "loss": 2.8202, "step": 15230 }, { "epoch": 0.4641562149111421, "grad_norm": 31.0, "learning_rate": 0.00012482296563731403, "loss": 2.7918, "step": 15240 }, { "epoch": 0.4644607793566219, "grad_norm": 31.875, "learning_rate": 0.00012473535056341806, "loss": 2.7987, "step": 15250 }, { "epoch": 0.46476534380210166, "grad_norm": 32.25, "learning_rate": 0.00012464772113776607, "loss": 2.8015, "step": 15260 }, { "epoch": 0.4650699082475814, "grad_norm": 30.625, "learning_rate": 0.00012456007744570622, "loss": 2.7842, "step": 15270 }, { "epoch": 0.46537447269306115, "grad_norm": 30.75, "learning_rate": 0.00012447241957260066, "loss": 2.774, "step": 15280 }, { "epoch": 0.4656790371385409, "grad_norm": 32.0, "learning_rate": 0.00012438474760382534, "loss": 2.7946, "step": 15290 }, { "epoch": 0.46598360158402063, "grad_norm": 31.125, "learning_rate": 0.00012429706162476992, "loss": 2.7986, "step": 15300 }, { "epoch": 0.4662881660295004, "grad_norm": 31.0, "learning_rate": 0.00012420936172083776, "loss": 2.8123, "step": 15310 }, { "epoch": 0.4665927304749802, "grad_norm": 32.0, "learning_rate": 0.00012412164797744567, "loss": 2.8025, "step": 15320 }, { "epoch": 0.4668972949204599, "grad_norm": 30.625, "learning_rate": 0.00012403392048002403, "loss": 2.7597, "step": 15330 }, { "epoch": 0.46720185936593966, "grad_norm": 30.625, "learning_rate": 0.00012394617931401665, "loss": 2.8179, "step": 15340 }, { "epoch": 0.4675064238114194, "grad_norm": 31.0, "learning_rate": 0.00012385842456488048, "loss": 2.7443, "step": 15350 }, { "epoch": 0.46781098825689915, "grad_norm": 31.875, "learning_rate": 0.00012377065631808591, "loss": 2.8299, "step": 15360 }, { "epoch": 0.4681155527023789, "grad_norm": 33.5, "learning_rate": 0.00012368287465911633, "loss": 2.8045, "step": 15370 }, { "epoch": 0.46842011714785864, "grad_norm": 31.875, "learning_rate": 0.0001235950796734683, "loss": 2.8018, "step": 15380 }, { "epoch": 0.4687246815933384, "grad_norm": 31.25, "learning_rate": 0.0001235072714466512, "loss": 2.7904, "step": 15390 }, { "epoch": 0.4690292460388182, "grad_norm": 31.125, "learning_rate": 0.00012341945006418752, "loss": 2.8032, "step": 15400 }, { "epoch": 0.4693338104842979, "grad_norm": 32.75, "learning_rate": 0.00012333161561161243, "loss": 2.7807, "step": 15410 }, { "epoch": 0.46963837492977767, "grad_norm": 30.5, "learning_rate": 0.00012324376817447378, "loss": 2.7534, "step": 15420 }, { "epoch": 0.46994293937525744, "grad_norm": 32.0, "learning_rate": 0.0001231559078383322, "loss": 2.7915, "step": 15430 }, { "epoch": 0.47024750382073716, "grad_norm": 31.625, "learning_rate": 0.00012306803468876084, "loss": 2.8077, "step": 15440 }, { "epoch": 0.4705520682662169, "grad_norm": 31.0, "learning_rate": 0.0001229801488113453, "loss": 2.7938, "step": 15450 }, { "epoch": 0.4708566327116967, "grad_norm": 31.5, "learning_rate": 0.00012289225029168355, "loss": 2.7568, "step": 15460 }, { "epoch": 0.4711611971571764, "grad_norm": 31.125, "learning_rate": 0.00012280433921538594, "loss": 2.8259, "step": 15470 }, { "epoch": 0.4714657616026562, "grad_norm": 31.5, "learning_rate": 0.000122716415668075, "loss": 2.7964, "step": 15480 }, { "epoch": 0.47177032604813596, "grad_norm": 31.875, "learning_rate": 0.00012262847973538548, "loss": 2.8215, "step": 15490 }, { "epoch": 0.4720748904936157, "grad_norm": 32.25, "learning_rate": 0.00012254053150296415, "loss": 2.8098, "step": 15500 }, { "epoch": 0.47237945493909544, "grad_norm": 31.0, "learning_rate": 0.00012245257105646966, "loss": 2.7529, "step": 15510 }, { "epoch": 0.4726840193845752, "grad_norm": 30.75, "learning_rate": 0.0001223645984815727, "loss": 2.7885, "step": 15520 }, { "epoch": 0.47298858383005493, "grad_norm": 32.0, "learning_rate": 0.00012227661386395575, "loss": 2.8171, "step": 15530 }, { "epoch": 0.4732931482755347, "grad_norm": 33.0, "learning_rate": 0.00012218861728931294, "loss": 2.8041, "step": 15540 }, { "epoch": 0.4735977127210145, "grad_norm": 31.625, "learning_rate": 0.00012210060884335013, "loss": 2.7693, "step": 15550 }, { "epoch": 0.4739022771664942, "grad_norm": 32.25, "learning_rate": 0.00012201258861178469, "loss": 2.7715, "step": 15560 }, { "epoch": 0.47420684161197396, "grad_norm": 31.75, "learning_rate": 0.00012192455668034544, "loss": 2.7987, "step": 15570 }, { "epoch": 0.4745114060574537, "grad_norm": 31.875, "learning_rate": 0.00012183651313477271, "loss": 2.8001, "step": 15580 }, { "epoch": 0.47481597050293345, "grad_norm": 32.0, "learning_rate": 0.00012174845806081808, "loss": 2.7757, "step": 15590 }, { "epoch": 0.4751205349484132, "grad_norm": 32.25, "learning_rate": 0.00012166039154424429, "loss": 2.7886, "step": 15600 }, { "epoch": 0.47542509939389294, "grad_norm": 31.0, "learning_rate": 0.00012157231367082534, "loss": 2.7854, "step": 15610 }, { "epoch": 0.4757296638393727, "grad_norm": 30.875, "learning_rate": 0.0001214842245263462, "loss": 2.7886, "step": 15620 }, { "epoch": 0.4760342282848525, "grad_norm": 32.75, "learning_rate": 0.00012139612419660287, "loss": 2.8173, "step": 15630 }, { "epoch": 0.4763387927303322, "grad_norm": 34.0, "learning_rate": 0.00012130801276740225, "loss": 2.7951, "step": 15640 }, { "epoch": 0.47664335717581197, "grad_norm": 30.625, "learning_rate": 0.00012121989032456198, "loss": 2.7943, "step": 15650 }, { "epoch": 0.47694792162129174, "grad_norm": 30.75, "learning_rate": 0.0001211317569539105, "loss": 2.7908, "step": 15660 }, { "epoch": 0.47725248606677145, "grad_norm": 30.875, "learning_rate": 0.00012104361274128691, "loss": 2.7576, "step": 15670 }, { "epoch": 0.4775570505122512, "grad_norm": 33.0, "learning_rate": 0.00012095545777254074, "loss": 2.7676, "step": 15680 }, { "epoch": 0.477861614957731, "grad_norm": 30.5, "learning_rate": 0.0001208672921335322, "loss": 2.7852, "step": 15690 }, { "epoch": 0.4781661794032107, "grad_norm": 31.0, "learning_rate": 0.00012077911591013169, "loss": 2.8063, "step": 15700 }, { "epoch": 0.4784707438486905, "grad_norm": 32.25, "learning_rate": 0.00012069092918822003, "loss": 2.8058, "step": 15710 }, { "epoch": 0.47877530829417025, "grad_norm": 31.625, "learning_rate": 0.00012060273205368821, "loss": 2.823, "step": 15720 }, { "epoch": 0.47907987273964997, "grad_norm": 31.125, "learning_rate": 0.0001205145245924375, "loss": 2.7948, "step": 15730 }, { "epoch": 0.47938443718512974, "grad_norm": 33.5, "learning_rate": 0.00012042630689037906, "loss": 2.7628, "step": 15740 }, { "epoch": 0.4796890016306095, "grad_norm": 31.375, "learning_rate": 0.00012033807903343401, "loss": 2.7827, "step": 15750 }, { "epoch": 0.47999356607608923, "grad_norm": 32.25, "learning_rate": 0.00012024984110753355, "loss": 2.8289, "step": 15760 }, { "epoch": 0.480298130521569, "grad_norm": 32.5, "learning_rate": 0.00012016159319861848, "loss": 2.7731, "step": 15770 }, { "epoch": 0.48060269496704877, "grad_norm": 32.0, "learning_rate": 0.00012007333539263952, "loss": 2.8166, "step": 15780 }, { "epoch": 0.4809072594125285, "grad_norm": 32.5, "learning_rate": 0.00011998506777555685, "loss": 2.7951, "step": 15790 }, { "epoch": 0.48121182385800826, "grad_norm": 30.75, "learning_rate": 0.00011989679043334027, "loss": 2.7956, "step": 15800 }, { "epoch": 0.481516388303488, "grad_norm": 31.0, "learning_rate": 0.0001198085034519691, "loss": 2.7739, "step": 15810 }, { "epoch": 0.48182095274896775, "grad_norm": 33.0, "learning_rate": 0.00011972020691743203, "loss": 2.8309, "step": 15820 }, { "epoch": 0.4821255171944475, "grad_norm": 31.125, "learning_rate": 0.00011963190091572702, "loss": 2.7985, "step": 15830 }, { "epoch": 0.48243008163992723, "grad_norm": 31.5, "learning_rate": 0.00011954358553286123, "loss": 2.7925, "step": 15840 }, { "epoch": 0.482734646085407, "grad_norm": 30.25, "learning_rate": 0.00011945526085485101, "loss": 2.7723, "step": 15850 }, { "epoch": 0.4830392105308868, "grad_norm": 32.0, "learning_rate": 0.00011936692696772179, "loss": 2.758, "step": 15860 }, { "epoch": 0.4833437749763665, "grad_norm": 31.25, "learning_rate": 0.00011927858395750786, "loss": 2.7523, "step": 15870 }, { "epoch": 0.48364833942184626, "grad_norm": 31.125, "learning_rate": 0.00011919023191025253, "loss": 2.7776, "step": 15880 }, { "epoch": 0.48395290386732603, "grad_norm": 30.875, "learning_rate": 0.00011910187091200782, "loss": 2.7963, "step": 15890 }, { "epoch": 0.48425746831280575, "grad_norm": 31.375, "learning_rate": 0.00011901350104883444, "loss": 2.8062, "step": 15900 }, { "epoch": 0.4845620327582855, "grad_norm": 31.375, "learning_rate": 0.00011892512240680188, "loss": 2.7911, "step": 15910 }, { "epoch": 0.4848665972037653, "grad_norm": 31.875, "learning_rate": 0.000118836735071988, "loss": 2.8001, "step": 15920 }, { "epoch": 0.485171161649245, "grad_norm": 32.5, "learning_rate": 0.00011874833913047927, "loss": 2.8044, "step": 15930 }, { "epoch": 0.4854757260947248, "grad_norm": 32.25, "learning_rate": 0.00011865993466837041, "loss": 2.7881, "step": 15940 }, { "epoch": 0.48578029054020455, "grad_norm": 31.375, "learning_rate": 0.0001185715217717646, "loss": 2.8193, "step": 15950 }, { "epoch": 0.48608485498568427, "grad_norm": 32.75, "learning_rate": 0.00011848310052677311, "loss": 2.787, "step": 15960 }, { "epoch": 0.48638941943116404, "grad_norm": 32.75, "learning_rate": 0.0001183946710195154, "loss": 2.822, "step": 15970 }, { "epoch": 0.4866939838766438, "grad_norm": 32.0, "learning_rate": 0.00011830623333611894, "loss": 2.7896, "step": 15980 }, { "epoch": 0.4869985483221235, "grad_norm": 32.25, "learning_rate": 0.00011821778756271915, "loss": 2.7726, "step": 15990 }, { "epoch": 0.4873031127676033, "grad_norm": 33.5, "learning_rate": 0.00011812933378545943, "loss": 2.7919, "step": 16000 }, { "epoch": 0.48760767721308307, "grad_norm": 31.375, "learning_rate": 0.00011804087209049082, "loss": 2.7732, "step": 16010 }, { "epoch": 0.4879122416585628, "grad_norm": 30.625, "learning_rate": 0.00011795240256397227, "loss": 2.7882, "step": 16020 }, { "epoch": 0.48821680610404256, "grad_norm": 31.5, "learning_rate": 0.00011786392529207013, "loss": 2.7661, "step": 16030 }, { "epoch": 0.48852137054952227, "grad_norm": 31.375, "learning_rate": 0.00011777544036095846, "loss": 2.7872, "step": 16040 }, { "epoch": 0.48882593499500204, "grad_norm": 31.375, "learning_rate": 0.00011768694785681873, "loss": 2.7779, "step": 16050 }, { "epoch": 0.4891304994404818, "grad_norm": 31.125, "learning_rate": 0.0001175984478658398, "loss": 2.7775, "step": 16060 }, { "epoch": 0.48943506388596153, "grad_norm": 31.0, "learning_rate": 0.00011750994047421777, "loss": 2.7805, "step": 16070 }, { "epoch": 0.4897396283314413, "grad_norm": 31.5, "learning_rate": 0.000117421425768156, "loss": 2.7826, "step": 16080 }, { "epoch": 0.4900441927769211, "grad_norm": 32.5, "learning_rate": 0.00011733290383386495, "loss": 2.8174, "step": 16090 }, { "epoch": 0.4903487572224008, "grad_norm": 31.25, "learning_rate": 0.00011724437475756213, "loss": 2.7479, "step": 16100 }, { "epoch": 0.49065332166788056, "grad_norm": 30.875, "learning_rate": 0.00011715583862547206, "loss": 2.7909, "step": 16110 }, { "epoch": 0.49095788611336033, "grad_norm": 31.625, "learning_rate": 0.000117067295523826, "loss": 2.7789, "step": 16120 }, { "epoch": 0.49126245055884005, "grad_norm": 31.625, "learning_rate": 0.0001169787455388621, "loss": 2.7954, "step": 16130 }, { "epoch": 0.4915670150043198, "grad_norm": 30.625, "learning_rate": 0.00011689018875682521, "loss": 2.8039, "step": 16140 }, { "epoch": 0.4918715794497996, "grad_norm": 30.5, "learning_rate": 0.00011680162526396672, "loss": 2.7757, "step": 16150 }, { "epoch": 0.4921761438952793, "grad_norm": 30.625, "learning_rate": 0.00011671305514654467, "loss": 2.7656, "step": 16160 }, { "epoch": 0.4924807083407591, "grad_norm": 31.25, "learning_rate": 0.00011662447849082347, "loss": 2.824, "step": 16170 }, { "epoch": 0.49278527278623885, "grad_norm": 32.5, "learning_rate": 0.00011653589538307397, "loss": 2.7783, "step": 16180 }, { "epoch": 0.49308983723171856, "grad_norm": 32.0, "learning_rate": 0.0001164473059095732, "loss": 2.7675, "step": 16190 }, { "epoch": 0.49339440167719834, "grad_norm": 31.625, "learning_rate": 0.00011635871015660446, "loss": 2.7578, "step": 16200 }, { "epoch": 0.4936989661226781, "grad_norm": 33.0, "learning_rate": 0.00011627010821045718, "loss": 2.7556, "step": 16210 }, { "epoch": 0.4940035305681578, "grad_norm": 30.375, "learning_rate": 0.00011618150015742679, "loss": 2.7926, "step": 16220 }, { "epoch": 0.4943080950136376, "grad_norm": 29.5, "learning_rate": 0.00011609288608381468, "loss": 2.7511, "step": 16230 }, { "epoch": 0.49461265945911737, "grad_norm": 32.25, "learning_rate": 0.00011600426607592806, "loss": 2.7783, "step": 16240 }, { "epoch": 0.4949172239045971, "grad_norm": 31.875, "learning_rate": 0.00011591564022008004, "loss": 2.8004, "step": 16250 }, { "epoch": 0.49522178835007685, "grad_norm": 33.25, "learning_rate": 0.00011582700860258926, "loss": 2.8038, "step": 16260 }, { "epoch": 0.49552635279555657, "grad_norm": 31.25, "learning_rate": 0.00011573837130978009, "loss": 2.7762, "step": 16270 }, { "epoch": 0.49583091724103634, "grad_norm": 33.0, "learning_rate": 0.00011564972842798238, "loss": 2.7703, "step": 16280 }, { "epoch": 0.4961354816865161, "grad_norm": 30.125, "learning_rate": 0.00011556108004353148, "loss": 2.782, "step": 16290 }, { "epoch": 0.4964400461319958, "grad_norm": 32.5, "learning_rate": 0.00011547242624276802, "loss": 2.7743, "step": 16300 }, { "epoch": 0.4967446105774756, "grad_norm": 31.125, "learning_rate": 0.00011538376711203793, "loss": 2.7572, "step": 16310 }, { "epoch": 0.49704917502295537, "grad_norm": 32.0, "learning_rate": 0.00011529510273769235, "loss": 2.7587, "step": 16320 }, { "epoch": 0.4973537394684351, "grad_norm": 31.875, "learning_rate": 0.00011520643320608759, "loss": 2.7712, "step": 16330 }, { "epoch": 0.49765830391391486, "grad_norm": 32.25, "learning_rate": 0.0001151177586035848, "loss": 2.7726, "step": 16340 }, { "epoch": 0.49796286835939463, "grad_norm": 32.75, "learning_rate": 0.00011502907901655029, "loss": 2.7514, "step": 16350 }, { "epoch": 0.49826743280487434, "grad_norm": 31.375, "learning_rate": 0.00011494039453135501, "loss": 2.7619, "step": 16360 }, { "epoch": 0.4985719972503541, "grad_norm": 32.5, "learning_rate": 0.00011485170523437485, "loss": 2.7358, "step": 16370 }, { "epoch": 0.4988765616958339, "grad_norm": 31.875, "learning_rate": 0.00011476301121199033, "loss": 2.7533, "step": 16380 }, { "epoch": 0.4991811261413136, "grad_norm": 34.5, "learning_rate": 0.00011467431255058653, "loss": 2.8034, "step": 16390 }, { "epoch": 0.4994856905867934, "grad_norm": 31.5, "learning_rate": 0.00011458560933655314, "loss": 2.7976, "step": 16400 }, { "epoch": 0.49979025503227315, "grad_norm": 31.25, "learning_rate": 0.00011449690165628418, "loss": 2.7922, "step": 16410 }, { "epoch": 0.5000948194777529, "grad_norm": 31.375, "learning_rate": 0.00011440818959617808, "loss": 2.7847, "step": 16420 }, { "epoch": 0.5003993839232326, "grad_norm": 33.5, "learning_rate": 0.00011431947324263752, "loss": 2.8146, "step": 16430 }, { "epoch": 0.5007039483687123, "grad_norm": 32.5, "learning_rate": 0.00011423075268206939, "loss": 2.7862, "step": 16440 }, { "epoch": 0.5010085128141921, "grad_norm": 32.5, "learning_rate": 0.00011414202800088463, "loss": 2.8016, "step": 16450 }, { "epoch": 0.5013130772596719, "grad_norm": 33.0, "learning_rate": 0.00011405329928549821, "loss": 2.7705, "step": 16460 }, { "epoch": 0.5016176417051517, "grad_norm": 30.625, "learning_rate": 0.00011396456662232909, "loss": 2.7699, "step": 16470 }, { "epoch": 0.5019222061506314, "grad_norm": 31.125, "learning_rate": 0.00011387583009779996, "loss": 2.7693, "step": 16480 }, { "epoch": 0.5022267705961111, "grad_norm": 32.75, "learning_rate": 0.00011378708979833736, "loss": 2.7888, "step": 16490 }, { "epoch": 0.5025313350415909, "grad_norm": 31.375, "learning_rate": 0.00011369834581037145, "loss": 2.7869, "step": 16500 }, { "epoch": 0.5028358994870706, "grad_norm": 32.0, "learning_rate": 0.00011360959822033605, "loss": 2.7795, "step": 16510 }, { "epoch": 0.5031404639325504, "grad_norm": 31.75, "learning_rate": 0.00011352084711466843, "loss": 2.7463, "step": 16520 }, { "epoch": 0.5034450283780302, "grad_norm": 32.0, "learning_rate": 0.00011343209257980936, "loss": 2.7887, "step": 16530 }, { "epoch": 0.50374959282351, "grad_norm": 32.0, "learning_rate": 0.00011334333470220279, "loss": 2.7806, "step": 16540 }, { "epoch": 0.5040541572689896, "grad_norm": 30.625, "learning_rate": 0.0001132545735682961, "loss": 2.7939, "step": 16550 }, { "epoch": 0.5043587217144694, "grad_norm": 30.875, "learning_rate": 0.00011316580926453974, "loss": 2.8021, "step": 16560 }, { "epoch": 0.5046632861599492, "grad_norm": 32.0, "learning_rate": 0.00011307704187738727, "loss": 2.8023, "step": 16570 }, { "epoch": 0.5049678506054289, "grad_norm": 31.375, "learning_rate": 0.00011298827149329529, "loss": 2.7847, "step": 16580 }, { "epoch": 0.5052724150509087, "grad_norm": 30.25, "learning_rate": 0.00011289949819872334, "loss": 2.8107, "step": 16590 }, { "epoch": 0.5055769794963885, "grad_norm": 32.25, "learning_rate": 0.00011281072208013364, "loss": 2.7719, "step": 16600 }, { "epoch": 0.5058815439418681, "grad_norm": 30.625, "learning_rate": 0.0001127219432239913, "loss": 2.7638, "step": 16610 }, { "epoch": 0.5061861083873479, "grad_norm": 30.625, "learning_rate": 0.0001126331617167641, "loss": 2.7771, "step": 16620 }, { "epoch": 0.5064906728328277, "grad_norm": 31.25, "learning_rate": 0.00011254437764492232, "loss": 2.7978, "step": 16630 }, { "epoch": 0.5067952372783074, "grad_norm": 32.25, "learning_rate": 0.00011245559109493879, "loss": 2.783, "step": 16640 }, { "epoch": 0.5070998017237872, "grad_norm": 31.5, "learning_rate": 0.00011236680215328877, "loss": 2.8007, "step": 16650 }, { "epoch": 0.5074043661692669, "grad_norm": 31.375, "learning_rate": 0.0001122780109064498, "loss": 2.7673, "step": 16660 }, { "epoch": 0.5077089306147466, "grad_norm": 32.0, "learning_rate": 0.00011218921744090172, "loss": 2.7672, "step": 16670 }, { "epoch": 0.5080134950602264, "grad_norm": 31.125, "learning_rate": 0.00011210042184312648, "loss": 2.7472, "step": 16680 }, { "epoch": 0.5083180595057062, "grad_norm": 31.875, "learning_rate": 0.00011201162419960811, "loss": 2.7929, "step": 16690 }, { "epoch": 0.508622623951186, "grad_norm": 31.625, "learning_rate": 0.00011192282459683267, "loss": 2.7638, "step": 16700 }, { "epoch": 0.5089271883966657, "grad_norm": 32.25, "learning_rate": 0.0001118340231212881, "loss": 2.7886, "step": 16710 }, { "epoch": 0.5092317528421454, "grad_norm": 31.625, "learning_rate": 0.0001117452198594642, "loss": 2.7813, "step": 16720 }, { "epoch": 0.5095363172876252, "grad_norm": 31.125, "learning_rate": 0.00011165641489785247, "loss": 2.8151, "step": 16730 }, { "epoch": 0.5098408817331049, "grad_norm": 32.0, "learning_rate": 0.00011156760832294604, "loss": 2.7625, "step": 16740 }, { "epoch": 0.5101454461785847, "grad_norm": 31.75, "learning_rate": 0.00011147880022123966, "loss": 2.8039, "step": 16750 }, { "epoch": 0.5104500106240645, "grad_norm": 31.25, "learning_rate": 0.00011138999067922961, "loss": 2.765, "step": 16760 }, { "epoch": 0.5107545750695442, "grad_norm": 31.375, "learning_rate": 0.00011130117978341345, "loss": 2.8114, "step": 16770 }, { "epoch": 0.5110591395150239, "grad_norm": 32.25, "learning_rate": 0.00011121236762029021, "loss": 2.7726, "step": 16780 }, { "epoch": 0.5113637039605037, "grad_norm": 32.75, "learning_rate": 0.00011112355427635998, "loss": 2.7845, "step": 16790 }, { "epoch": 0.5116682684059835, "grad_norm": 31.625, "learning_rate": 0.00011103473983812414, "loss": 2.7916, "step": 16800 }, { "epoch": 0.5119728328514632, "grad_norm": 31.375, "learning_rate": 0.00011094592439208507, "loss": 2.7725, "step": 16810 }, { "epoch": 0.512277397296943, "grad_norm": 31.875, "learning_rate": 0.00011085710802474618, "loss": 2.7804, "step": 16820 }, { "epoch": 0.5125819617424228, "grad_norm": 31.5, "learning_rate": 0.00011076829082261171, "loss": 2.764, "step": 16830 }, { "epoch": 0.5128865261879024, "grad_norm": 32.5, "learning_rate": 0.00011067947287218676, "loss": 2.7923, "step": 16840 }, { "epoch": 0.5131910906333822, "grad_norm": 30.5, "learning_rate": 0.00011059065425997714, "loss": 2.7811, "step": 16850 }, { "epoch": 0.513495655078862, "grad_norm": 32.5, "learning_rate": 0.00011050183507248937, "loss": 2.7493, "step": 16860 }, { "epoch": 0.5138002195243417, "grad_norm": 33.25, "learning_rate": 0.00011041301539623043, "loss": 2.7603, "step": 16870 }, { "epoch": 0.5141047839698215, "grad_norm": 30.75, "learning_rate": 0.00011032419531770779, "loss": 2.757, "step": 16880 }, { "epoch": 0.5144093484153012, "grad_norm": 30.625, "learning_rate": 0.00011023537492342934, "loss": 2.7585, "step": 16890 }, { "epoch": 0.5147139128607809, "grad_norm": 31.875, "learning_rate": 0.00011014655429990337, "loss": 2.7953, "step": 16900 }, { "epoch": 0.5150184773062607, "grad_norm": 31.375, "learning_rate": 0.00011005773353363822, "loss": 2.7857, "step": 16910 }, { "epoch": 0.5153230417517405, "grad_norm": 31.125, "learning_rate": 0.00010996891271114247, "loss": 2.7633, "step": 16920 }, { "epoch": 0.5156276061972203, "grad_norm": 31.875, "learning_rate": 0.00010988009191892473, "loss": 2.765, "step": 16930 }, { "epoch": 0.5159321706427, "grad_norm": 30.5, "learning_rate": 0.00010979127124349357, "loss": 2.7644, "step": 16940 }, { "epoch": 0.5162367350881797, "grad_norm": 32.75, "learning_rate": 0.00010970245077135752, "loss": 2.7864, "step": 16950 }, { "epoch": 0.5165412995336595, "grad_norm": 31.75, "learning_rate": 0.00010961363058902481, "loss": 2.7609, "step": 16960 }, { "epoch": 0.5168458639791392, "grad_norm": 30.625, "learning_rate": 0.00010952481078300343, "loss": 2.7773, "step": 16970 }, { "epoch": 0.517150428424619, "grad_norm": 32.5, "learning_rate": 0.00010943599143980104, "loss": 2.7976, "step": 16980 }, { "epoch": 0.5174549928700988, "grad_norm": 31.25, "learning_rate": 0.00010934717264592476, "loss": 2.7682, "step": 16990 }, { "epoch": 0.5177595573155785, "grad_norm": 30.75, "learning_rate": 0.00010925835448788129, "loss": 2.7903, "step": 17000 }, { "epoch": 0.5180641217610582, "grad_norm": 30.75, "learning_rate": 0.00010916953705217661, "loss": 2.7794, "step": 17010 }, { "epoch": 0.518368686206538, "grad_norm": 32.75, "learning_rate": 0.0001090807204253161, "loss": 2.776, "step": 17020 }, { "epoch": 0.5186732506520177, "grad_norm": 32.75, "learning_rate": 0.00010899190469380418, "loss": 2.7219, "step": 17030 }, { "epoch": 0.5189778150974975, "grad_norm": 32.25, "learning_rate": 0.00010890308994414458, "loss": 2.759, "step": 17040 }, { "epoch": 0.5192823795429773, "grad_norm": 31.75, "learning_rate": 0.00010881427626283998, "loss": 2.7788, "step": 17050 }, { "epoch": 0.5195869439884571, "grad_norm": 31.0, "learning_rate": 0.00010872546373639205, "loss": 2.7908, "step": 17060 }, { "epoch": 0.5198915084339367, "grad_norm": 33.5, "learning_rate": 0.0001086366524513013, "loss": 2.7509, "step": 17070 }, { "epoch": 0.5201960728794165, "grad_norm": 30.875, "learning_rate": 0.00010854784249406711, "loss": 2.7547, "step": 17080 }, { "epoch": 0.5205006373248963, "grad_norm": 33.0, "learning_rate": 0.00010845903395118746, "loss": 2.79, "step": 17090 }, { "epoch": 0.520805201770376, "grad_norm": 32.25, "learning_rate": 0.00010837022690915903, "loss": 2.784, "step": 17100 }, { "epoch": 0.5211097662158558, "grad_norm": 31.625, "learning_rate": 0.00010828142145447703, "loss": 2.7396, "step": 17110 }, { "epoch": 0.5214143306613355, "grad_norm": 31.0, "learning_rate": 0.00010819261767363508, "loss": 2.7952, "step": 17120 }, { "epoch": 0.5217188951068152, "grad_norm": 31.75, "learning_rate": 0.00010810381565312522, "loss": 2.7619, "step": 17130 }, { "epoch": 0.522023459552295, "grad_norm": 31.25, "learning_rate": 0.00010801501547943771, "loss": 2.7692, "step": 17140 }, { "epoch": 0.5223280239977748, "grad_norm": 31.5, "learning_rate": 0.00010792621723906111, "loss": 2.7898, "step": 17150 }, { "epoch": 0.5226325884432546, "grad_norm": 31.375, "learning_rate": 0.00010783742101848202, "loss": 2.7532, "step": 17160 }, { "epoch": 0.5229371528887343, "grad_norm": 31.0, "learning_rate": 0.00010774862690418506, "loss": 2.7363, "step": 17170 }, { "epoch": 0.523241717334214, "grad_norm": 32.0, "learning_rate": 0.00010765983498265289, "loss": 2.7524, "step": 17180 }, { "epoch": 0.5235462817796938, "grad_norm": 31.125, "learning_rate": 0.00010757104534036593, "loss": 2.7553, "step": 17190 }, { "epoch": 0.5238508462251735, "grad_norm": 30.625, "learning_rate": 0.00010748225806380248, "loss": 2.7767, "step": 17200 }, { "epoch": 0.5241554106706533, "grad_norm": 30.5, "learning_rate": 0.0001073934732394384, "loss": 2.7527, "step": 17210 }, { "epoch": 0.5244599751161331, "grad_norm": 31.375, "learning_rate": 0.00010730469095374733, "loss": 2.8082, "step": 17220 }, { "epoch": 0.5247645395616128, "grad_norm": 31.375, "learning_rate": 0.0001072159112932003, "loss": 2.7791, "step": 17230 }, { "epoch": 0.5250691040070925, "grad_norm": 31.875, "learning_rate": 0.00010712713434426586, "loss": 2.7425, "step": 17240 }, { "epoch": 0.5253736684525723, "grad_norm": 32.5, "learning_rate": 0.00010703836019340989, "loss": 2.7593, "step": 17250 }, { "epoch": 0.525678232898052, "grad_norm": 33.0, "learning_rate": 0.00010694958892709555, "loss": 2.7603, "step": 17260 }, { "epoch": 0.5259827973435318, "grad_norm": 30.375, "learning_rate": 0.00010686082063178315, "loss": 2.7669, "step": 17270 }, { "epoch": 0.5262873617890116, "grad_norm": 31.75, "learning_rate": 0.0001067720553939302, "loss": 2.7693, "step": 17280 }, { "epoch": 0.5265919262344914, "grad_norm": 32.25, "learning_rate": 0.00010668329329999118, "loss": 2.7542, "step": 17290 }, { "epoch": 0.526896490679971, "grad_norm": 32.25, "learning_rate": 0.00010659453443641745, "loss": 2.8371, "step": 17300 }, { "epoch": 0.5272010551254508, "grad_norm": 30.5, "learning_rate": 0.00010650577888965732, "loss": 2.7877, "step": 17310 }, { "epoch": 0.5275056195709306, "grad_norm": 31.25, "learning_rate": 0.00010641702674615584, "loss": 2.7848, "step": 17320 }, { "epoch": 0.5278101840164103, "grad_norm": 32.0, "learning_rate": 0.00010632827809235473, "loss": 2.7635, "step": 17330 }, { "epoch": 0.5281147484618901, "grad_norm": 32.75, "learning_rate": 0.00010623953301469228, "loss": 2.7555, "step": 17340 }, { "epoch": 0.5284193129073698, "grad_norm": 31.625, "learning_rate": 0.00010615079159960332, "loss": 2.7404, "step": 17350 }, { "epoch": 0.5287238773528495, "grad_norm": 31.125, "learning_rate": 0.0001060620539335192, "loss": 2.7909, "step": 17360 }, { "epoch": 0.5290284417983293, "grad_norm": 31.375, "learning_rate": 0.0001059733201028675, "loss": 2.7586, "step": 17370 }, { "epoch": 0.5293330062438091, "grad_norm": 31.25, "learning_rate": 0.00010588459019407213, "loss": 2.7538, "step": 17380 }, { "epoch": 0.5296375706892889, "grad_norm": 30.375, "learning_rate": 0.00010579586429355311, "loss": 2.7509, "step": 17390 }, { "epoch": 0.5299421351347686, "grad_norm": 30.875, "learning_rate": 0.00010570714248772664, "loss": 2.7952, "step": 17400 }, { "epoch": 0.5302466995802483, "grad_norm": 32.75, "learning_rate": 0.0001056184248630049, "loss": 2.807, "step": 17410 }, { "epoch": 0.530551264025728, "grad_norm": 32.5, "learning_rate": 0.00010552971150579602, "loss": 2.7388, "step": 17420 }, { "epoch": 0.5308558284712078, "grad_norm": 31.75, "learning_rate": 0.00010544100250250391, "loss": 2.7481, "step": 17430 }, { "epoch": 0.5311603929166876, "grad_norm": 35.0, "learning_rate": 0.00010535229793952825, "loss": 2.7825, "step": 17440 }, { "epoch": 0.5314649573621674, "grad_norm": 32.75, "learning_rate": 0.00010526359790326448, "loss": 2.7794, "step": 17450 }, { "epoch": 0.5317695218076471, "grad_norm": 32.0, "learning_rate": 0.00010517490248010353, "loss": 2.7441, "step": 17460 }, { "epoch": 0.5320740862531268, "grad_norm": 31.375, "learning_rate": 0.00010508621175643189, "loss": 2.7637, "step": 17470 }, { "epoch": 0.5323786506986066, "grad_norm": 30.625, "learning_rate": 0.00010499752581863149, "loss": 2.764, "step": 17480 }, { "epoch": 0.5326832151440863, "grad_norm": 32.5, "learning_rate": 0.00010490884475307956, "loss": 2.7735, "step": 17490 }, { "epoch": 0.5329877795895661, "grad_norm": 32.25, "learning_rate": 0.00010482016864614856, "loss": 2.7995, "step": 17500 }, { "epoch": 0.5332923440350459, "grad_norm": 32.0, "learning_rate": 0.00010473149758420622, "loss": 2.7963, "step": 17510 }, { "epoch": 0.5335969084805257, "grad_norm": 31.0, "learning_rate": 0.00010464283165361526, "loss": 2.7665, "step": 17520 }, { "epoch": 0.5339014729260053, "grad_norm": 31.625, "learning_rate": 0.00010455417094073343, "loss": 2.7809, "step": 17530 }, { "epoch": 0.5342060373714851, "grad_norm": 31.375, "learning_rate": 0.00010446551553191342, "loss": 2.7826, "step": 17540 }, { "epoch": 0.5345106018169649, "grad_norm": 33.75, "learning_rate": 0.00010437686551350272, "loss": 2.7682, "step": 17550 }, { "epoch": 0.5348151662624446, "grad_norm": 31.75, "learning_rate": 0.00010428822097184361, "loss": 2.7527, "step": 17560 }, { "epoch": 0.5351197307079244, "grad_norm": 30.25, "learning_rate": 0.000104199581993273, "loss": 2.7506, "step": 17570 }, { "epoch": 0.5354242951534041, "grad_norm": 33.5, "learning_rate": 0.00010411094866412241, "loss": 2.7859, "step": 17580 }, { "epoch": 0.5357288595988838, "grad_norm": 31.5, "learning_rate": 0.00010402232107071781, "loss": 2.8021, "step": 17590 }, { "epoch": 0.5360334240443636, "grad_norm": 31.375, "learning_rate": 0.00010393369929937964, "loss": 2.7731, "step": 17600 }, { "epoch": 0.5363379884898434, "grad_norm": 31.125, "learning_rate": 0.00010384508343642265, "loss": 2.7813, "step": 17610 }, { "epoch": 0.5366425529353231, "grad_norm": 30.75, "learning_rate": 0.00010375647356815583, "loss": 2.7591, "step": 17620 }, { "epoch": 0.5369471173808029, "grad_norm": 32.25, "learning_rate": 0.00010366786978088234, "loss": 2.8, "step": 17630 }, { "epoch": 0.5372516818262826, "grad_norm": 31.0, "learning_rate": 0.00010357927216089937, "loss": 2.771, "step": 17640 }, { "epoch": 0.5375562462717624, "grad_norm": 32.5, "learning_rate": 0.0001034906807944982, "loss": 2.754, "step": 17650 }, { "epoch": 0.5378608107172421, "grad_norm": 31.0, "learning_rate": 0.00010340209576796391, "loss": 2.7671, "step": 17660 }, { "epoch": 0.5381653751627219, "grad_norm": 32.75, "learning_rate": 0.0001033135171675755, "loss": 2.7954, "step": 17670 }, { "epoch": 0.5384699396082017, "grad_norm": 31.625, "learning_rate": 0.00010322494507960565, "loss": 2.7531, "step": 17680 }, { "epoch": 0.5387745040536814, "grad_norm": 30.625, "learning_rate": 0.00010313637959032072, "loss": 2.804, "step": 17690 }, { "epoch": 0.5390790684991611, "grad_norm": 32.0, "learning_rate": 0.00010304782078598064, "loss": 2.7541, "step": 17700 }, { "epoch": 0.5393836329446409, "grad_norm": 31.375, "learning_rate": 0.00010295926875283883, "loss": 2.7872, "step": 17710 }, { "epoch": 0.5396881973901206, "grad_norm": 31.25, "learning_rate": 0.00010287072357714211, "loss": 2.7472, "step": 17720 }, { "epoch": 0.5399927618356004, "grad_norm": 32.0, "learning_rate": 0.0001027821853451306, "loss": 2.7467, "step": 17730 }, { "epoch": 0.5402973262810802, "grad_norm": 32.5, "learning_rate": 0.00010269365414303773, "loss": 2.7347, "step": 17740 }, { "epoch": 0.54060189072656, "grad_norm": 30.125, "learning_rate": 0.00010260513005709, "loss": 2.7196, "step": 17750 }, { "epoch": 0.5409064551720396, "grad_norm": 30.75, "learning_rate": 0.00010251661317350705, "loss": 2.7677, "step": 17760 }, { "epoch": 0.5412110196175194, "grad_norm": 32.0, "learning_rate": 0.00010242810357850141, "loss": 2.7588, "step": 17770 }, { "epoch": 0.5415155840629992, "grad_norm": 31.375, "learning_rate": 0.00010233960135827863, "loss": 2.7615, "step": 17780 }, { "epoch": 0.5418201485084789, "grad_norm": 30.75, "learning_rate": 0.00010225110659903696, "loss": 2.7293, "step": 17790 }, { "epoch": 0.5421247129539587, "grad_norm": 33.0, "learning_rate": 0.00010216261938696751, "loss": 2.7528, "step": 17800 }, { "epoch": 0.5424292773994384, "grad_norm": 31.0, "learning_rate": 0.00010207413980825393, "loss": 2.7771, "step": 17810 }, { "epoch": 0.5427338418449181, "grad_norm": 32.5, "learning_rate": 0.00010198566794907247, "loss": 2.7591, "step": 17820 }, { "epoch": 0.5430384062903979, "grad_norm": 34.25, "learning_rate": 0.00010189720389559185, "loss": 2.7504, "step": 17830 }, { "epoch": 0.5433429707358777, "grad_norm": 32.5, "learning_rate": 0.00010180874773397325, "loss": 2.7706, "step": 17840 }, { "epoch": 0.5436475351813574, "grad_norm": 32.0, "learning_rate": 0.00010172029955037009, "loss": 2.7857, "step": 17850 }, { "epoch": 0.5439520996268372, "grad_norm": 31.5, "learning_rate": 0.00010163185943092804, "loss": 2.7451, "step": 17860 }, { "epoch": 0.5442566640723169, "grad_norm": 31.5, "learning_rate": 0.00010154342746178494, "loss": 2.7429, "step": 17870 }, { "epoch": 0.5445612285177966, "grad_norm": 32.75, "learning_rate": 0.00010145500372907067, "loss": 2.7478, "step": 17880 }, { "epoch": 0.5448657929632764, "grad_norm": 32.5, "learning_rate": 0.00010136658831890709, "loss": 2.7636, "step": 17890 }, { "epoch": 0.5451703574087562, "grad_norm": 31.5, "learning_rate": 0.00010127818131740796, "loss": 2.7708, "step": 17900 }, { "epoch": 0.545474921854236, "grad_norm": 31.75, "learning_rate": 0.00010118978281067885, "loss": 2.7864, "step": 17910 }, { "epoch": 0.5457794862997157, "grad_norm": 31.0, "learning_rate": 0.00010110139288481705, "loss": 2.7739, "step": 17920 }, { "epoch": 0.5460840507451954, "grad_norm": 31.0, "learning_rate": 0.0001010130116259115, "loss": 2.7804, "step": 17930 }, { "epoch": 0.5463886151906752, "grad_norm": 32.25, "learning_rate": 0.00010092463912004271, "loss": 2.7601, "step": 17940 }, { "epoch": 0.5466931796361549, "grad_norm": 33.25, "learning_rate": 0.00010083627545328263, "loss": 2.7661, "step": 17950 }, { "epoch": 0.5469977440816347, "grad_norm": 34.0, "learning_rate": 0.00010074792071169459, "loss": 2.7711, "step": 17960 }, { "epoch": 0.5473023085271145, "grad_norm": 31.625, "learning_rate": 0.00010065957498133331, "loss": 2.7758, "step": 17970 }, { "epoch": 0.5476068729725942, "grad_norm": 31.125, "learning_rate": 0.00010057123834824466, "loss": 2.7602, "step": 17980 }, { "epoch": 0.5479114374180739, "grad_norm": 32.25, "learning_rate": 0.00010048291089846567, "loss": 2.7538, "step": 17990 }, { "epoch": 0.5482160018635537, "grad_norm": 32.25, "learning_rate": 0.00010039459271802444, "loss": 2.7548, "step": 18000 }, { "epoch": 0.5485205663090335, "grad_norm": 31.125, "learning_rate": 0.00010030628389294001, "loss": 2.7545, "step": 18010 }, { "epoch": 0.5488251307545132, "grad_norm": 32.0, "learning_rate": 0.00010021798450922235, "loss": 2.7913, "step": 18020 }, { "epoch": 0.549129695199993, "grad_norm": 31.125, "learning_rate": 0.00010012969465287216, "loss": 2.7442, "step": 18030 }, { "epoch": 0.5494342596454727, "grad_norm": 31.625, "learning_rate": 0.00010004141440988098, "loss": 2.7486, "step": 18040 }, { "epoch": 0.5497388240909524, "grad_norm": 33.5, "learning_rate": 9.995314386623087e-05, "loss": 2.7518, "step": 18050 }, { "epoch": 0.5500433885364322, "grad_norm": 31.5, "learning_rate": 9.98648831078945e-05, "loss": 2.7992, "step": 18060 }, { "epoch": 0.550347952981912, "grad_norm": 32.25, "learning_rate": 9.977663222083503e-05, "loss": 2.7971, "step": 18070 }, { "epoch": 0.5506525174273917, "grad_norm": 31.125, "learning_rate": 9.968839129100593e-05, "loss": 2.7622, "step": 18080 }, { "epoch": 0.5509570818728715, "grad_norm": 32.75, "learning_rate": 9.960016040435108e-05, "loss": 2.786, "step": 18090 }, { "epoch": 0.5512616463183512, "grad_norm": 31.625, "learning_rate": 9.951193964680445e-05, "loss": 2.7459, "step": 18100 }, { "epoch": 0.5515662107638309, "grad_norm": 30.75, "learning_rate": 9.942372910429025e-05, "loss": 2.7654, "step": 18110 }, { "epoch": 0.5518707752093107, "grad_norm": 32.5, "learning_rate": 9.933552886272271e-05, "loss": 2.7145, "step": 18120 }, { "epoch": 0.5521753396547905, "grad_norm": 31.75, "learning_rate": 9.924733900800603e-05, "loss": 2.7887, "step": 18130 }, { "epoch": 0.5524799041002703, "grad_norm": 32.0, "learning_rate": 9.91591596260342e-05, "loss": 2.7564, "step": 18140 }, { "epoch": 0.55278446854575, "grad_norm": 32.25, "learning_rate": 9.907099080269118e-05, "loss": 2.7733, "step": 18150 }, { "epoch": 0.5530890329912297, "grad_norm": 32.75, "learning_rate": 9.89828326238505e-05, "loss": 2.7637, "step": 18160 }, { "epoch": 0.5533935974367095, "grad_norm": 31.25, "learning_rate": 9.889468517537545e-05, "loss": 2.7867, "step": 18170 }, { "epoch": 0.5536981618821892, "grad_norm": 31.0, "learning_rate": 9.88065485431188e-05, "loss": 2.7695, "step": 18180 }, { "epoch": 0.554002726327669, "grad_norm": 31.375, "learning_rate": 9.87184228129227e-05, "loss": 2.7412, "step": 18190 }, { "epoch": 0.5543072907731488, "grad_norm": 32.5, "learning_rate": 9.863030807061882e-05, "loss": 2.7752, "step": 18200 }, { "epoch": 0.5546118552186285, "grad_norm": 32.75, "learning_rate": 9.85422044020281e-05, "loss": 2.7342, "step": 18210 }, { "epoch": 0.5549164196641082, "grad_norm": 31.625, "learning_rate": 9.845411189296063e-05, "loss": 2.7395, "step": 18220 }, { "epoch": 0.555220984109588, "grad_norm": 31.875, "learning_rate": 9.83660306292157e-05, "loss": 2.7545, "step": 18230 }, { "epoch": 0.5555255485550677, "grad_norm": 31.125, "learning_rate": 9.82779606965816e-05, "loss": 2.7931, "step": 18240 }, { "epoch": 0.5558301130005475, "grad_norm": 33.0, "learning_rate": 9.818990218083562e-05, "loss": 2.766, "step": 18250 }, { "epoch": 0.5561346774460273, "grad_norm": 31.875, "learning_rate": 9.810185516774386e-05, "loss": 2.7573, "step": 18260 }, { "epoch": 0.556439241891507, "grad_norm": 32.75, "learning_rate": 9.801381974306137e-05, "loss": 2.7598, "step": 18270 }, { "epoch": 0.5567438063369867, "grad_norm": 30.5, "learning_rate": 9.792579599253172e-05, "loss": 2.726, "step": 18280 }, { "epoch": 0.5570483707824665, "grad_norm": 32.5, "learning_rate": 9.783778400188726e-05, "loss": 2.7603, "step": 18290 }, { "epoch": 0.5573529352279463, "grad_norm": 30.875, "learning_rate": 9.77497838568488e-05, "loss": 2.7909, "step": 18300 }, { "epoch": 0.557657499673426, "grad_norm": 31.375, "learning_rate": 9.766179564312566e-05, "loss": 2.7367, "step": 18310 }, { "epoch": 0.5579620641189058, "grad_norm": 31.625, "learning_rate": 9.75738194464155e-05, "loss": 2.7419, "step": 18320 }, { "epoch": 0.5582666285643855, "grad_norm": 31.75, "learning_rate": 9.748585535240435e-05, "loss": 2.7515, "step": 18330 }, { "epoch": 0.5585711930098652, "grad_norm": 31.375, "learning_rate": 9.739790344676633e-05, "loss": 2.7537, "step": 18340 }, { "epoch": 0.558875757455345, "grad_norm": 30.625, "learning_rate": 9.73099638151638e-05, "loss": 2.7654, "step": 18350 }, { "epoch": 0.5591803219008248, "grad_norm": 32.5, "learning_rate": 9.72220365432471e-05, "loss": 2.78, "step": 18360 }, { "epoch": 0.5594848863463046, "grad_norm": 32.25, "learning_rate": 9.713412171665457e-05, "loss": 2.7738, "step": 18370 }, { "epoch": 0.5597894507917843, "grad_norm": 32.75, "learning_rate": 9.704621942101247e-05, "loss": 2.7442, "step": 18380 }, { "epoch": 0.560094015237264, "grad_norm": 31.5, "learning_rate": 9.695832974193469e-05, "loss": 2.786, "step": 18390 }, { "epoch": 0.5603985796827438, "grad_norm": 31.375, "learning_rate": 9.6870452765023e-05, "loss": 2.7563, "step": 18400 }, { "epoch": 0.5607031441282235, "grad_norm": 32.75, "learning_rate": 9.678258857586672e-05, "loss": 2.7276, "step": 18410 }, { "epoch": 0.5610077085737033, "grad_norm": 32.0, "learning_rate": 9.669473726004282e-05, "loss": 2.7544, "step": 18420 }, { "epoch": 0.5613122730191831, "grad_norm": 31.5, "learning_rate": 9.660689890311552e-05, "loss": 2.7643, "step": 18430 }, { "epoch": 0.5616168374646628, "grad_norm": 32.5, "learning_rate": 9.651907359063664e-05, "loss": 2.7417, "step": 18440 }, { "epoch": 0.5619214019101425, "grad_norm": 31.5, "learning_rate": 9.643126140814513e-05, "loss": 2.7516, "step": 18450 }, { "epoch": 0.5622259663556223, "grad_norm": 32.25, "learning_rate": 9.634346244116728e-05, "loss": 2.7494, "step": 18460 }, { "epoch": 0.562530530801102, "grad_norm": 30.25, "learning_rate": 9.625567677521645e-05, "loss": 2.7342, "step": 18470 }, { "epoch": 0.5628350952465818, "grad_norm": 31.25, "learning_rate": 9.616790449579303e-05, "loss": 2.7369, "step": 18480 }, { "epoch": 0.5631396596920616, "grad_norm": 31.25, "learning_rate": 9.608014568838435e-05, "loss": 2.7789, "step": 18490 }, { "epoch": 0.5634442241375412, "grad_norm": 32.0, "learning_rate": 9.599240043846471e-05, "loss": 2.7778, "step": 18500 }, { "epoch": 0.563748788583021, "grad_norm": 32.75, "learning_rate": 9.590466883149516e-05, "loss": 2.759, "step": 18510 }, { "epoch": 0.5640533530285008, "grad_norm": 30.5, "learning_rate": 9.581695095292343e-05, "loss": 2.7307, "step": 18520 }, { "epoch": 0.5643579174739806, "grad_norm": 32.75, "learning_rate": 9.572924688818388e-05, "loss": 2.8038, "step": 18530 }, { "epoch": 0.5646624819194603, "grad_norm": 32.5, "learning_rate": 9.564155672269746e-05, "loss": 2.7565, "step": 18540 }, { "epoch": 0.5649670463649401, "grad_norm": 32.0, "learning_rate": 9.555388054187158e-05, "loss": 2.7458, "step": 18550 }, { "epoch": 0.5652716108104198, "grad_norm": 31.75, "learning_rate": 9.546621843110001e-05, "loss": 2.7532, "step": 18560 }, { "epoch": 0.5655761752558995, "grad_norm": 33.25, "learning_rate": 9.53785704757628e-05, "loss": 2.7633, "step": 18570 }, { "epoch": 0.5658807397013793, "grad_norm": 31.125, "learning_rate": 9.529093676122622e-05, "loss": 2.7577, "step": 18580 }, { "epoch": 0.5661853041468591, "grad_norm": 32.0, "learning_rate": 9.520331737284272e-05, "loss": 2.7451, "step": 18590 }, { "epoch": 0.5664898685923389, "grad_norm": 30.875, "learning_rate": 9.511571239595074e-05, "loss": 2.7679, "step": 18600 }, { "epoch": 0.5667944330378186, "grad_norm": 31.375, "learning_rate": 9.502812191587469e-05, "loss": 2.7925, "step": 18610 }, { "epoch": 0.5670989974832983, "grad_norm": 31.75, "learning_rate": 9.494054601792488e-05, "loss": 2.7551, "step": 18620 }, { "epoch": 0.567403561928778, "grad_norm": 31.75, "learning_rate": 9.485298478739739e-05, "loss": 2.773, "step": 18630 }, { "epoch": 0.5677081263742578, "grad_norm": 31.75, "learning_rate": 9.476543830957405e-05, "loss": 2.7702, "step": 18640 }, { "epoch": 0.5680126908197376, "grad_norm": 31.25, "learning_rate": 9.467790666972232e-05, "loss": 2.7584, "step": 18650 }, { "epoch": 0.5683172552652174, "grad_norm": 31.0, "learning_rate": 9.459038995309517e-05, "loss": 2.7281, "step": 18660 }, { "epoch": 0.5686218197106971, "grad_norm": 32.0, "learning_rate": 9.45028882449311e-05, "loss": 2.7435, "step": 18670 }, { "epoch": 0.5689263841561768, "grad_norm": 31.375, "learning_rate": 9.441540163045388e-05, "loss": 2.7731, "step": 18680 }, { "epoch": 0.5692309486016566, "grad_norm": 32.5, "learning_rate": 9.43279301948727e-05, "loss": 2.7377, "step": 18690 }, { "epoch": 0.5695355130471363, "grad_norm": 31.375, "learning_rate": 9.42404740233819e-05, "loss": 2.7582, "step": 18700 }, { "epoch": 0.5698400774926161, "grad_norm": 30.875, "learning_rate": 9.4153033201161e-05, "loss": 2.7607, "step": 18710 }, { "epoch": 0.5701446419380959, "grad_norm": 30.75, "learning_rate": 9.40656078133745e-05, "loss": 2.7418, "step": 18720 }, { "epoch": 0.5704492063835755, "grad_norm": 31.0, "learning_rate": 9.39781979451719e-05, "loss": 2.7545, "step": 18730 }, { "epoch": 0.5707537708290553, "grad_norm": 32.75, "learning_rate": 9.389080368168763e-05, "loss": 2.779, "step": 18740 }, { "epoch": 0.5710583352745351, "grad_norm": 30.875, "learning_rate": 9.380342510804087e-05, "loss": 2.7695, "step": 18750 }, { "epoch": 0.5713628997200149, "grad_norm": 31.0, "learning_rate": 9.371606230933554e-05, "loss": 2.742, "step": 18760 }, { "epoch": 0.5716674641654946, "grad_norm": 33.25, "learning_rate": 9.362871537066016e-05, "loss": 2.7662, "step": 18770 }, { "epoch": 0.5719720286109744, "grad_norm": 31.875, "learning_rate": 9.354138437708786e-05, "loss": 2.7671, "step": 18780 }, { "epoch": 0.5722765930564541, "grad_norm": 31.75, "learning_rate": 9.345406941367619e-05, "loss": 2.7645, "step": 18790 }, { "epoch": 0.5725811575019338, "grad_norm": 32.5, "learning_rate": 9.336677056546712e-05, "loss": 2.7772, "step": 18800 }, { "epoch": 0.5728857219474136, "grad_norm": 31.125, "learning_rate": 9.327948791748689e-05, "loss": 2.7867, "step": 18810 }, { "epoch": 0.5731902863928934, "grad_norm": 30.5, "learning_rate": 9.319222155474601e-05, "loss": 2.7774, "step": 18820 }, { "epoch": 0.5734948508383731, "grad_norm": 32.25, "learning_rate": 9.310497156223908e-05, "loss": 2.7611, "step": 18830 }, { "epoch": 0.5737994152838529, "grad_norm": 30.5, "learning_rate": 9.301773802494482e-05, "loss": 2.7629, "step": 18840 }, { "epoch": 0.5741039797293326, "grad_norm": 33.25, "learning_rate": 9.293052102782582e-05, "loss": 2.749, "step": 18850 }, { "epoch": 0.5744085441748124, "grad_norm": 31.625, "learning_rate": 9.284332065582866e-05, "loss": 2.7423, "step": 18860 }, { "epoch": 0.5747131086202921, "grad_norm": 32.75, "learning_rate": 9.275613699388366e-05, "loss": 2.7927, "step": 18870 }, { "epoch": 0.5750176730657719, "grad_norm": 31.125, "learning_rate": 9.266897012690494e-05, "loss": 2.7604, "step": 18880 }, { "epoch": 0.5753222375112517, "grad_norm": 33.25, "learning_rate": 9.25818201397902e-05, "loss": 2.7681, "step": 18890 }, { "epoch": 0.5756268019567314, "grad_norm": 32.0, "learning_rate": 9.249468711742069e-05, "loss": 2.7437, "step": 18900 }, { "epoch": 0.5759313664022111, "grad_norm": 31.0, "learning_rate": 9.240757114466119e-05, "loss": 2.7603, "step": 18910 }, { "epoch": 0.5762359308476909, "grad_norm": 30.75, "learning_rate": 9.23204723063598e-05, "loss": 2.7503, "step": 18920 }, { "epoch": 0.5765404952931706, "grad_norm": 31.0, "learning_rate": 9.223339068734807e-05, "loss": 2.7878, "step": 18930 }, { "epoch": 0.5768450597386504, "grad_norm": 32.0, "learning_rate": 9.214632637244061e-05, "loss": 2.7714, "step": 18940 }, { "epoch": 0.5771496241841302, "grad_norm": 31.0, "learning_rate": 9.205927944643526e-05, "loss": 2.7772, "step": 18950 }, { "epoch": 0.5774541886296098, "grad_norm": 31.625, "learning_rate": 9.197224999411296e-05, "loss": 2.754, "step": 18960 }, { "epoch": 0.5777587530750896, "grad_norm": 33.25, "learning_rate": 9.188523810023754e-05, "loss": 2.7344, "step": 18970 }, { "epoch": 0.5780633175205694, "grad_norm": 32.75, "learning_rate": 9.179824384955584e-05, "loss": 2.7661, "step": 18980 }, { "epoch": 0.5783678819660492, "grad_norm": 37.25, "learning_rate": 9.171126732679738e-05, "loss": 2.7794, "step": 18990 }, { "epoch": 0.5786724464115289, "grad_norm": 34.25, "learning_rate": 9.162430861667458e-05, "loss": 2.7549, "step": 19000 }, { "epoch": 0.5789770108570087, "grad_norm": 32.25, "learning_rate": 9.153736780388227e-05, "loss": 2.7534, "step": 19010 }, { "epoch": 0.5792815753024884, "grad_norm": 33.5, "learning_rate": 9.145044497309814e-05, "loss": 2.7697, "step": 19020 }, { "epoch": 0.5795861397479681, "grad_norm": 33.0, "learning_rate": 9.136354020898215e-05, "loss": 2.7298, "step": 19030 }, { "epoch": 0.5798907041934479, "grad_norm": 34.0, "learning_rate": 9.127665359617676e-05, "loss": 2.7774, "step": 19040 }, { "epoch": 0.5801952686389277, "grad_norm": 31.25, "learning_rate": 9.118978521930673e-05, "loss": 2.7601, "step": 19050 }, { "epoch": 0.5804998330844074, "grad_norm": 31.125, "learning_rate": 9.110293516297903e-05, "loss": 2.7362, "step": 19060 }, { "epoch": 0.5808043975298872, "grad_norm": 31.25, "learning_rate": 9.101610351178284e-05, "loss": 2.7784, "step": 19070 }, { "epoch": 0.5811089619753669, "grad_norm": 31.25, "learning_rate": 9.092929035028938e-05, "loss": 2.7401, "step": 19080 }, { "epoch": 0.5814135264208466, "grad_norm": 32.75, "learning_rate": 9.08424957630519e-05, "loss": 2.8023, "step": 19090 }, { "epoch": 0.5817180908663264, "grad_norm": 32.0, "learning_rate": 9.075571983460546e-05, "loss": 2.7328, "step": 19100 }, { "epoch": 0.5820226553118062, "grad_norm": 31.875, "learning_rate": 9.066896264946706e-05, "loss": 2.7719, "step": 19110 }, { "epoch": 0.582327219757286, "grad_norm": 31.0, "learning_rate": 9.058222429213541e-05, "loss": 2.7326, "step": 19120 }, { "epoch": 0.5826317842027657, "grad_norm": 33.0, "learning_rate": 9.049550484709084e-05, "loss": 2.7702, "step": 19130 }, { "epoch": 0.5829363486482454, "grad_norm": 31.375, "learning_rate": 9.040880439879529e-05, "loss": 2.741, "step": 19140 }, { "epoch": 0.5832409130937252, "grad_norm": 30.625, "learning_rate": 9.032212303169224e-05, "loss": 2.7604, "step": 19150 }, { "epoch": 0.5835454775392049, "grad_norm": 31.875, "learning_rate": 9.023546083020649e-05, "loss": 2.7502, "step": 19160 }, { "epoch": 0.5838500419846847, "grad_norm": 31.625, "learning_rate": 9.014881787874427e-05, "loss": 2.7699, "step": 19170 }, { "epoch": 0.5841546064301645, "grad_norm": 31.25, "learning_rate": 9.006219426169303e-05, "loss": 2.7582, "step": 19180 }, { "epoch": 0.5844591708756441, "grad_norm": 31.875, "learning_rate": 8.99755900634213e-05, "loss": 2.7123, "step": 19190 }, { "epoch": 0.5847637353211239, "grad_norm": 31.75, "learning_rate": 8.98890053682789e-05, "loss": 2.7651, "step": 19200 }, { "epoch": 0.5850682997666037, "grad_norm": 31.5, "learning_rate": 8.98024402605964e-05, "loss": 2.7515, "step": 19210 }, { "epoch": 0.5853728642120835, "grad_norm": 39.75, "learning_rate": 8.971589482468552e-05, "loss": 2.7723, "step": 19220 }, { "epoch": 0.5856774286575632, "grad_norm": 32.0, "learning_rate": 8.962936914483868e-05, "loss": 2.7525, "step": 19230 }, { "epoch": 0.585981993103043, "grad_norm": 32.5, "learning_rate": 8.954286330532908e-05, "loss": 2.7883, "step": 19240 }, { "epoch": 0.5862865575485227, "grad_norm": 33.0, "learning_rate": 8.945637739041066e-05, "loss": 2.7501, "step": 19250 }, { "epoch": 0.5865911219940024, "grad_norm": 29.75, "learning_rate": 8.936991148431788e-05, "loss": 2.7434, "step": 19260 }, { "epoch": 0.5868956864394822, "grad_norm": 31.625, "learning_rate": 8.92834656712658e-05, "loss": 2.7629, "step": 19270 }, { "epoch": 0.587200250884962, "grad_norm": 32.5, "learning_rate": 8.919704003544977e-05, "loss": 2.7577, "step": 19280 }, { "epoch": 0.5875048153304417, "grad_norm": 30.125, "learning_rate": 8.91106346610456e-05, "loss": 2.7605, "step": 19290 }, { "epoch": 0.5878093797759215, "grad_norm": 31.25, "learning_rate": 8.902424963220936e-05, "loss": 2.7281, "step": 19300 }, { "epoch": 0.5881139442214012, "grad_norm": 32.25, "learning_rate": 8.89378850330773e-05, "loss": 2.7368, "step": 19310 }, { "epoch": 0.588418508666881, "grad_norm": 32.25, "learning_rate": 8.885154094776569e-05, "loss": 2.7397, "step": 19320 }, { "epoch": 0.5887230731123607, "grad_norm": 33.0, "learning_rate": 8.876521746037093e-05, "loss": 2.751, "step": 19330 }, { "epoch": 0.5890276375578405, "grad_norm": 33.0, "learning_rate": 8.867891465496927e-05, "loss": 2.774, "step": 19340 }, { "epoch": 0.5893322020033203, "grad_norm": 31.625, "learning_rate": 8.85926326156169e-05, "loss": 2.7674, "step": 19350 }, { "epoch": 0.5896367664488, "grad_norm": 32.25, "learning_rate": 8.850637142634977e-05, "loss": 2.7487, "step": 19360 }, { "epoch": 0.5899413308942797, "grad_norm": 31.125, "learning_rate": 8.84201311711834e-05, "loss": 2.7822, "step": 19370 }, { "epoch": 0.5902458953397595, "grad_norm": 31.625, "learning_rate": 8.833391193411309e-05, "loss": 2.7524, "step": 19380 }, { "epoch": 0.5905504597852392, "grad_norm": 31.375, "learning_rate": 8.82477137991136e-05, "loss": 2.7688, "step": 19390 }, { "epoch": 0.590855024230719, "grad_norm": 31.5, "learning_rate": 8.816153685013908e-05, "loss": 2.7375, "step": 19400 }, { "epoch": 0.5911595886761988, "grad_norm": 32.0, "learning_rate": 8.807538117112313e-05, "loss": 2.7663, "step": 19410 }, { "epoch": 0.5914641531216784, "grad_norm": 32.75, "learning_rate": 8.79892468459786e-05, "loss": 2.7789, "step": 19420 }, { "epoch": 0.5917687175671582, "grad_norm": 30.75, "learning_rate": 8.790313395859753e-05, "loss": 2.7232, "step": 19430 }, { "epoch": 0.592073282012638, "grad_norm": 32.25, "learning_rate": 8.781704259285111e-05, "loss": 2.7488, "step": 19440 }, { "epoch": 0.5923778464581178, "grad_norm": 33.75, "learning_rate": 8.773097283258953e-05, "loss": 2.7477, "step": 19450 }, { "epoch": 0.5926824109035975, "grad_norm": 32.0, "learning_rate": 8.764492476164201e-05, "loss": 2.7292, "step": 19460 }, { "epoch": 0.5929869753490773, "grad_norm": 33.0, "learning_rate": 8.755889846381655e-05, "loss": 2.7705, "step": 19470 }, { "epoch": 0.593291539794557, "grad_norm": 30.625, "learning_rate": 8.747289402290001e-05, "loss": 2.783, "step": 19480 }, { "epoch": 0.5935961042400367, "grad_norm": 30.875, "learning_rate": 8.738691152265794e-05, "loss": 2.7342, "step": 19490 }, { "epoch": 0.5939006686855165, "grad_norm": 30.875, "learning_rate": 8.730095104683449e-05, "loss": 2.748, "step": 19500 }, { "epoch": 0.5942052331309963, "grad_norm": 31.625, "learning_rate": 8.721501267915244e-05, "loss": 2.7574, "step": 19510 }, { "epoch": 0.594509797576476, "grad_norm": 32.5, "learning_rate": 8.712909650331294e-05, "loss": 2.7631, "step": 19520 }, { "epoch": 0.5948143620219558, "grad_norm": 33.0, "learning_rate": 8.70432026029956e-05, "loss": 2.7444, "step": 19530 }, { "epoch": 0.5951189264674355, "grad_norm": 31.625, "learning_rate": 8.695733106185831e-05, "loss": 2.7398, "step": 19540 }, { "epoch": 0.5954234909129152, "grad_norm": 31.875, "learning_rate": 8.687148196353716e-05, "loss": 2.7846, "step": 19550 }, { "epoch": 0.595728055358395, "grad_norm": 31.625, "learning_rate": 8.678565539164645e-05, "loss": 2.7723, "step": 19560 }, { "epoch": 0.5960326198038748, "grad_norm": 32.25, "learning_rate": 8.669985142977844e-05, "loss": 2.7442, "step": 19570 }, { "epoch": 0.5963371842493546, "grad_norm": 32.5, "learning_rate": 8.661407016150342e-05, "loss": 2.7575, "step": 19580 }, { "epoch": 0.5966417486948343, "grad_norm": 32.5, "learning_rate": 8.652831167036962e-05, "loss": 2.7539, "step": 19590 }, { "epoch": 0.596946313140314, "grad_norm": 31.625, "learning_rate": 8.644257603990302e-05, "loss": 2.7624, "step": 19600 }, { "epoch": 0.5972508775857938, "grad_norm": 32.5, "learning_rate": 8.635686335360735e-05, "loss": 2.7487, "step": 19610 }, { "epoch": 0.5975554420312735, "grad_norm": 30.75, "learning_rate": 8.6271173694964e-05, "loss": 2.7298, "step": 19620 }, { "epoch": 0.5978600064767533, "grad_norm": 32.5, "learning_rate": 8.618550714743196e-05, "loss": 2.7415, "step": 19630 }, { "epoch": 0.5981645709222331, "grad_norm": 32.0, "learning_rate": 8.609986379444768e-05, "loss": 2.7486, "step": 19640 }, { "epoch": 0.5984691353677127, "grad_norm": 35.5, "learning_rate": 8.601424371942499e-05, "loss": 2.7611, "step": 19650 }, { "epoch": 0.5987736998131925, "grad_norm": 33.0, "learning_rate": 8.592864700575509e-05, "loss": 2.7917, "step": 19660 }, { "epoch": 0.5990782642586723, "grad_norm": 32.75, "learning_rate": 8.584307373680644e-05, "loss": 2.7285, "step": 19670 }, { "epoch": 0.599382828704152, "grad_norm": 31.75, "learning_rate": 8.575752399592465e-05, "loss": 2.7668, "step": 19680 }, { "epoch": 0.5996873931496318, "grad_norm": 32.0, "learning_rate": 8.567199786643239e-05, "loss": 2.7825, "step": 19690 }, { "epoch": 0.5999919575951116, "grad_norm": 32.25, "learning_rate": 8.55864954316293e-05, "loss": 2.7596, "step": 19700 }, { "epoch": 0.6002965220405913, "grad_norm": 31.0, "learning_rate": 8.550101677479208e-05, "loss": 2.7577, "step": 19710 }, { "epoch": 0.600601086486071, "grad_norm": 33.0, "learning_rate": 8.541556197917413e-05, "loss": 2.7329, "step": 19720 }, { "epoch": 0.6009056509315508, "grad_norm": 32.0, "learning_rate": 8.53301311280057e-05, "loss": 2.7308, "step": 19730 }, { "epoch": 0.6012102153770306, "grad_norm": 30.5, "learning_rate": 8.524472430449366e-05, "loss": 2.7404, "step": 19740 }, { "epoch": 0.6015147798225103, "grad_norm": 32.0, "learning_rate": 8.515934159182151e-05, "loss": 2.7182, "step": 19750 }, { "epoch": 0.6018193442679901, "grad_norm": 33.5, "learning_rate": 8.507398307314922e-05, "loss": 2.7292, "step": 19760 }, { "epoch": 0.6021239087134698, "grad_norm": 32.25, "learning_rate": 8.498864883161331e-05, "loss": 2.7101, "step": 19770 }, { "epoch": 0.6024284731589495, "grad_norm": 32.5, "learning_rate": 8.490333895032652e-05, "loss": 2.7343, "step": 19780 }, { "epoch": 0.6027330376044293, "grad_norm": 31.75, "learning_rate": 8.481805351237793e-05, "loss": 2.7309, "step": 19790 }, { "epoch": 0.6030376020499091, "grad_norm": 31.75, "learning_rate": 8.473279260083283e-05, "loss": 2.7847, "step": 19800 }, { "epoch": 0.6033421664953889, "grad_norm": 32.75, "learning_rate": 8.464755629873256e-05, "loss": 2.761, "step": 19810 }, { "epoch": 0.6036467309408686, "grad_norm": 32.25, "learning_rate": 8.456234468909454e-05, "loss": 2.7516, "step": 19820 }, { "epoch": 0.6039512953863483, "grad_norm": 34.0, "learning_rate": 8.447715785491212e-05, "loss": 2.7637, "step": 19830 }, { "epoch": 0.6042558598318281, "grad_norm": 31.375, "learning_rate": 8.439199587915459e-05, "loss": 2.7393, "step": 19840 }, { "epoch": 0.6045604242773078, "grad_norm": 32.75, "learning_rate": 8.43068588447669e-05, "loss": 2.74, "step": 19850 }, { "epoch": 0.6048649887227876, "grad_norm": 32.75, "learning_rate": 8.422174683466977e-05, "loss": 2.7687, "step": 19860 }, { "epoch": 0.6051695531682674, "grad_norm": 31.25, "learning_rate": 8.413665993175961e-05, "loss": 2.7787, "step": 19870 }, { "epoch": 0.6054741176137471, "grad_norm": 32.75, "learning_rate": 8.405159821890827e-05, "loss": 2.7585, "step": 19880 }, { "epoch": 0.6057786820592268, "grad_norm": 33.0, "learning_rate": 8.396656177896314e-05, "loss": 2.7412, "step": 19890 }, { "epoch": 0.6060832465047066, "grad_norm": 31.0, "learning_rate": 8.388155069474694e-05, "loss": 2.7579, "step": 19900 }, { "epoch": 0.6063878109501863, "grad_norm": 32.25, "learning_rate": 8.379656504905775e-05, "loss": 2.7668, "step": 19910 }, { "epoch": 0.6066923753956661, "grad_norm": 32.25, "learning_rate": 8.371160492466885e-05, "loss": 2.7395, "step": 19920 }, { "epoch": 0.6069969398411459, "grad_norm": 31.25, "learning_rate": 8.362667040432865e-05, "loss": 2.7218, "step": 19930 }, { "epoch": 0.6073015042866255, "grad_norm": 31.5, "learning_rate": 8.354176157076063e-05, "loss": 2.7111, "step": 19940 }, { "epoch": 0.6076060687321053, "grad_norm": 30.25, "learning_rate": 8.345687850666324e-05, "loss": 2.746, "step": 19950 }, { "epoch": 0.6079106331775851, "grad_norm": 33.0, "learning_rate": 8.337202129470987e-05, "loss": 2.7379, "step": 19960 }, { "epoch": 0.6082151976230649, "grad_norm": 31.5, "learning_rate": 8.32871900175487e-05, "loss": 2.7464, "step": 19970 }, { "epoch": 0.6085197620685446, "grad_norm": 31.75, "learning_rate": 8.320238475780269e-05, "loss": 2.7441, "step": 19980 }, { "epoch": 0.6088243265140244, "grad_norm": 32.5, "learning_rate": 8.311760559806936e-05, "loss": 2.7635, "step": 19990 }, { "epoch": 0.6091288909595041, "grad_norm": 32.5, "learning_rate": 8.303285262092092e-05, "loss": 2.752, "step": 20000 }, { "epoch": 0.6094334554049838, "grad_norm": 32.5, "learning_rate": 8.294812590890402e-05, "loss": 2.7579, "step": 20010 }, { "epoch": 0.6097380198504636, "grad_norm": 32.0, "learning_rate": 8.286342554453976e-05, "loss": 2.7224, "step": 20020 }, { "epoch": 0.6100425842959434, "grad_norm": 32.25, "learning_rate": 8.277875161032355e-05, "loss": 2.7079, "step": 20030 }, { "epoch": 0.6103471487414232, "grad_norm": 31.75, "learning_rate": 8.269410418872508e-05, "loss": 2.7194, "step": 20040 }, { "epoch": 0.6106517131869029, "grad_norm": 32.5, "learning_rate": 8.260948336218816e-05, "loss": 2.7557, "step": 20050 }, { "epoch": 0.6109562776323826, "grad_norm": 31.375, "learning_rate": 8.252488921313085e-05, "loss": 2.7581, "step": 20060 }, { "epoch": 0.6112608420778624, "grad_norm": 31.125, "learning_rate": 8.244032182394503e-05, "loss": 2.7447, "step": 20070 }, { "epoch": 0.6115654065233421, "grad_norm": 30.875, "learning_rate": 8.235578127699661e-05, "loss": 2.7443, "step": 20080 }, { "epoch": 0.6118699709688219, "grad_norm": 32.75, "learning_rate": 8.22712676546254e-05, "loss": 2.7269, "step": 20090 }, { "epoch": 0.6121745354143017, "grad_norm": 31.5, "learning_rate": 8.218678103914493e-05, "loss": 2.7829, "step": 20100 }, { "epoch": 0.6124790998597814, "grad_norm": 33.5, "learning_rate": 8.210232151284245e-05, "loss": 2.7793, "step": 20110 }, { "epoch": 0.6127836643052611, "grad_norm": 30.75, "learning_rate": 8.201788915797877e-05, "loss": 2.7515, "step": 20120 }, { "epoch": 0.6130882287507409, "grad_norm": 31.875, "learning_rate": 8.19334840567883e-05, "loss": 2.7529, "step": 20130 }, { "epoch": 0.6133927931962206, "grad_norm": 32.25, "learning_rate": 8.184910629147888e-05, "loss": 2.7473, "step": 20140 }, { "epoch": 0.6136973576417004, "grad_norm": 32.75, "learning_rate": 8.176475594423174e-05, "loss": 2.7524, "step": 20150 }, { "epoch": 0.6140019220871802, "grad_norm": 32.5, "learning_rate": 8.168043309720146e-05, "loss": 2.7358, "step": 20160 }, { "epoch": 0.6143064865326598, "grad_norm": 31.5, "learning_rate": 8.159613783251565e-05, "loss": 2.7583, "step": 20170 }, { "epoch": 0.6146110509781396, "grad_norm": 32.0, "learning_rate": 8.15118702322753e-05, "loss": 2.7438, "step": 20180 }, { "epoch": 0.6149156154236194, "grad_norm": 32.25, "learning_rate": 8.142763037855422e-05, "loss": 2.7414, "step": 20190 }, { "epoch": 0.6152201798690992, "grad_norm": 32.0, "learning_rate": 8.134341835339938e-05, "loss": 2.7631, "step": 20200 }, { "epoch": 0.6155247443145789, "grad_norm": 31.0, "learning_rate": 8.125923423883055e-05, "loss": 2.7181, "step": 20210 }, { "epoch": 0.6158293087600587, "grad_norm": 31.625, "learning_rate": 8.117507811684036e-05, "loss": 2.7624, "step": 20220 }, { "epoch": 0.6161338732055384, "grad_norm": 32.0, "learning_rate": 8.109095006939414e-05, "loss": 2.7506, "step": 20230 }, { "epoch": 0.6164384376510181, "grad_norm": 31.75, "learning_rate": 8.100685017842991e-05, "loss": 2.7187, "step": 20240 }, { "epoch": 0.6167430020964979, "grad_norm": 29.75, "learning_rate": 8.092277852585821e-05, "loss": 2.7511, "step": 20250 }, { "epoch": 0.6170475665419777, "grad_norm": 33.5, "learning_rate": 8.08387351935622e-05, "loss": 2.7426, "step": 20260 }, { "epoch": 0.6173521309874574, "grad_norm": 34.0, "learning_rate": 8.07547202633973e-05, "loss": 2.7371, "step": 20270 }, { "epoch": 0.6176566954329372, "grad_norm": 31.25, "learning_rate": 8.067073381719136e-05, "loss": 2.7348, "step": 20280 }, { "epoch": 0.6179612598784169, "grad_norm": 32.75, "learning_rate": 8.058677593674448e-05, "loss": 2.7466, "step": 20290 }, { "epoch": 0.6182658243238967, "grad_norm": 32.5, "learning_rate": 8.050284670382893e-05, "loss": 2.7322, "step": 20300 }, { "epoch": 0.6185703887693764, "grad_norm": 31.625, "learning_rate": 8.041894620018911e-05, "loss": 2.7367, "step": 20310 }, { "epoch": 0.6188749532148562, "grad_norm": 31.25, "learning_rate": 8.03350745075413e-05, "loss": 2.7258, "step": 20320 }, { "epoch": 0.619179517660336, "grad_norm": 32.5, "learning_rate": 8.025123170757389e-05, "loss": 2.7562, "step": 20330 }, { "epoch": 0.6194840821058157, "grad_norm": 30.125, "learning_rate": 8.016741788194706e-05, "loss": 2.77, "step": 20340 }, { "epoch": 0.6197886465512954, "grad_norm": 31.5, "learning_rate": 8.008363311229276e-05, "loss": 2.7767, "step": 20350 }, { "epoch": 0.6200932109967752, "grad_norm": 32.5, "learning_rate": 7.999987748021471e-05, "loss": 2.7506, "step": 20360 }, { "epoch": 0.6203977754422549, "grad_norm": 32.0, "learning_rate": 7.991615106728809e-05, "loss": 2.753, "step": 20370 }, { "epoch": 0.6207023398877347, "grad_norm": 30.625, "learning_rate": 7.98324539550598e-05, "loss": 2.7131, "step": 20380 }, { "epoch": 0.6210069043332145, "grad_norm": 32.75, "learning_rate": 7.974878622504812e-05, "loss": 2.7547, "step": 20390 }, { "epoch": 0.6213114687786941, "grad_norm": 32.5, "learning_rate": 7.966514795874271e-05, "loss": 2.7576, "step": 20400 }, { "epoch": 0.6216160332241739, "grad_norm": 31.25, "learning_rate": 7.958153923760452e-05, "loss": 2.7341, "step": 20410 }, { "epoch": 0.6219205976696537, "grad_norm": 31.375, "learning_rate": 7.949796014306576e-05, "loss": 2.7451, "step": 20420 }, { "epoch": 0.6222251621151335, "grad_norm": 31.0, "learning_rate": 7.94144107565298e-05, "loss": 2.7238, "step": 20430 }, { "epoch": 0.6225297265606132, "grad_norm": 32.5, "learning_rate": 7.9330891159371e-05, "loss": 2.7433, "step": 20440 }, { "epoch": 0.622834291006093, "grad_norm": 32.5, "learning_rate": 7.924740143293481e-05, "loss": 2.7482, "step": 20450 }, { "epoch": 0.6231388554515727, "grad_norm": 31.75, "learning_rate": 7.916394165853748e-05, "loss": 2.7616, "step": 20460 }, { "epoch": 0.6234434198970524, "grad_norm": 31.125, "learning_rate": 7.908051191746615e-05, "loss": 2.7185, "step": 20470 }, { "epoch": 0.6237479843425322, "grad_norm": 32.75, "learning_rate": 7.899711229097872e-05, "loss": 2.73, "step": 20480 }, { "epoch": 0.624052548788012, "grad_norm": 32.0, "learning_rate": 7.891374286030373e-05, "loss": 2.7581, "step": 20490 }, { "epoch": 0.6243571132334917, "grad_norm": 31.0, "learning_rate": 7.883040370664028e-05, "loss": 2.7218, "step": 20500 }, { "epoch": 0.6246616776789715, "grad_norm": 33.0, "learning_rate": 7.874709491115803e-05, "loss": 2.7025, "step": 20510 }, { "epoch": 0.6249662421244512, "grad_norm": 32.75, "learning_rate": 7.866381655499711e-05, "loss": 2.7357, "step": 20520 }, { "epoch": 0.625270806569931, "grad_norm": 33.75, "learning_rate": 7.858056871926787e-05, "loss": 2.748, "step": 20530 }, { "epoch": 0.6255753710154107, "grad_norm": 32.25, "learning_rate": 7.849735148505108e-05, "loss": 2.7377, "step": 20540 }, { "epoch": 0.6258799354608905, "grad_norm": 31.375, "learning_rate": 7.841416493339764e-05, "loss": 2.7466, "step": 20550 }, { "epoch": 0.6261844999063703, "grad_norm": 33.5, "learning_rate": 7.833100914532852e-05, "loss": 2.7144, "step": 20560 }, { "epoch": 0.62648906435185, "grad_norm": 32.5, "learning_rate": 7.82478842018348e-05, "loss": 2.7462, "step": 20570 }, { "epoch": 0.6267936287973297, "grad_norm": 31.125, "learning_rate": 7.816479018387755e-05, "loss": 2.7737, "step": 20580 }, { "epoch": 0.6270981932428095, "grad_norm": 32.25, "learning_rate": 7.808172717238756e-05, "loss": 2.7585, "step": 20590 }, { "epoch": 0.6274027576882892, "grad_norm": 32.75, "learning_rate": 7.79986952482656e-05, "loss": 2.7515, "step": 20600 }, { "epoch": 0.627707322133769, "grad_norm": 31.25, "learning_rate": 7.791569449238201e-05, "loss": 2.7116, "step": 20610 }, { "epoch": 0.6280118865792488, "grad_norm": 32.5, "learning_rate": 7.783272498557693e-05, "loss": 2.7207, "step": 20620 }, { "epoch": 0.6283164510247284, "grad_norm": 31.25, "learning_rate": 7.774978680865992e-05, "loss": 2.7407, "step": 20630 }, { "epoch": 0.6286210154702082, "grad_norm": 31.25, "learning_rate": 7.766688004241015e-05, "loss": 2.7463, "step": 20640 }, { "epoch": 0.628925579915688, "grad_norm": 31.25, "learning_rate": 7.758400476757609e-05, "loss": 2.7617, "step": 20650 }, { "epoch": 0.6292301443611678, "grad_norm": 31.0, "learning_rate": 7.750116106487559e-05, "loss": 2.7337, "step": 20660 }, { "epoch": 0.6295347088066475, "grad_norm": 32.25, "learning_rate": 7.741834901499576e-05, "loss": 2.7574, "step": 20670 }, { "epoch": 0.6298392732521273, "grad_norm": 32.25, "learning_rate": 7.733556869859285e-05, "loss": 2.7745, "step": 20680 }, { "epoch": 0.630143837697607, "grad_norm": 33.0, "learning_rate": 7.725282019629223e-05, "loss": 2.7278, "step": 20690 }, { "epoch": 0.6304484021430867, "grad_norm": 31.5, "learning_rate": 7.717010358868825e-05, "loss": 2.7208, "step": 20700 }, { "epoch": 0.6307529665885665, "grad_norm": 32.5, "learning_rate": 7.708741895634426e-05, "loss": 2.738, "step": 20710 }, { "epoch": 0.6310575310340463, "grad_norm": 32.0, "learning_rate": 7.700476637979237e-05, "loss": 2.6947, "step": 20720 }, { "epoch": 0.631362095479526, "grad_norm": 30.5, "learning_rate": 7.692214593953358e-05, "loss": 2.7074, "step": 20730 }, { "epoch": 0.6316666599250058, "grad_norm": 32.75, "learning_rate": 7.683955771603748e-05, "loss": 2.7264, "step": 20740 }, { "epoch": 0.6319712243704855, "grad_norm": 32.25, "learning_rate": 7.675700178974237e-05, "loss": 2.7294, "step": 20750 }, { "epoch": 0.6322757888159652, "grad_norm": 32.0, "learning_rate": 7.667447824105503e-05, "loss": 2.7609, "step": 20760 }, { "epoch": 0.632580353261445, "grad_norm": 33.0, "learning_rate": 7.659198715035076e-05, "loss": 2.7508, "step": 20770 }, { "epoch": 0.6328849177069248, "grad_norm": 32.5, "learning_rate": 7.650952859797322e-05, "loss": 2.7394, "step": 20780 }, { "epoch": 0.6331894821524046, "grad_norm": 34.5, "learning_rate": 7.642710266423436e-05, "loss": 2.7859, "step": 20790 }, { "epoch": 0.6334940465978843, "grad_norm": 31.625, "learning_rate": 7.634470942941441e-05, "loss": 2.7596, "step": 20800 }, { "epoch": 0.633798611043364, "grad_norm": 33.0, "learning_rate": 7.626234897376168e-05, "loss": 2.7468, "step": 20810 }, { "epoch": 0.6341031754888438, "grad_norm": 30.75, "learning_rate": 7.618002137749265e-05, "loss": 2.7385, "step": 20820 }, { "epoch": 0.6344077399343235, "grad_norm": 30.625, "learning_rate": 7.60977267207917e-05, "loss": 2.7378, "step": 20830 }, { "epoch": 0.6347123043798033, "grad_norm": 31.375, "learning_rate": 7.601546508381117e-05, "loss": 2.7481, "step": 20840 }, { "epoch": 0.6350168688252831, "grad_norm": 32.75, "learning_rate": 7.593323654667125e-05, "loss": 2.722, "step": 20850 }, { "epoch": 0.6353214332707627, "grad_norm": 32.0, "learning_rate": 7.585104118945991e-05, "loss": 2.7504, "step": 20860 }, { "epoch": 0.6356259977162425, "grad_norm": 33.25, "learning_rate": 7.576887909223278e-05, "loss": 2.7844, "step": 20870 }, { "epoch": 0.6359305621617223, "grad_norm": 32.0, "learning_rate": 7.568675033501303e-05, "loss": 2.76, "step": 20880 }, { "epoch": 0.636235126607202, "grad_norm": 33.0, "learning_rate": 7.560465499779145e-05, "loss": 2.7388, "step": 20890 }, { "epoch": 0.6365396910526818, "grad_norm": 32.0, "learning_rate": 7.55225931605263e-05, "loss": 2.718, "step": 20900 }, { "epoch": 0.6368442554981616, "grad_norm": 30.75, "learning_rate": 7.544056490314309e-05, "loss": 2.7357, "step": 20910 }, { "epoch": 0.6371488199436413, "grad_norm": 33.0, "learning_rate": 7.535857030553472e-05, "loss": 2.7502, "step": 20920 }, { "epoch": 0.637453384389121, "grad_norm": 31.625, "learning_rate": 7.52766094475613e-05, "loss": 2.7589, "step": 20930 }, { "epoch": 0.6377579488346008, "grad_norm": 33.75, "learning_rate": 7.519468240905002e-05, "loss": 2.7353, "step": 20940 }, { "epoch": 0.6380625132800806, "grad_norm": 32.5, "learning_rate": 7.511278926979524e-05, "loss": 2.7389, "step": 20950 }, { "epoch": 0.6383670777255603, "grad_norm": 33.25, "learning_rate": 7.503093010955817e-05, "loss": 2.7594, "step": 20960 }, { "epoch": 0.6386716421710401, "grad_norm": 33.0, "learning_rate": 7.494910500806704e-05, "loss": 2.7047, "step": 20970 }, { "epoch": 0.6389762066165198, "grad_norm": 33.25, "learning_rate": 7.486731404501684e-05, "loss": 2.7534, "step": 20980 }, { "epoch": 0.6392807710619995, "grad_norm": 32.25, "learning_rate": 7.47855573000693e-05, "loss": 2.7282, "step": 20990 }, { "epoch": 0.6395853355074793, "grad_norm": 32.5, "learning_rate": 7.470383485285288e-05, "loss": 2.7509, "step": 21000 }, { "epoch": 0.6398898999529591, "grad_norm": 33.0, "learning_rate": 7.462214678296262e-05, "loss": 2.756, "step": 21010 }, { "epoch": 0.6401944643984389, "grad_norm": 32.0, "learning_rate": 7.454049316996006e-05, "loss": 2.721, "step": 21020 }, { "epoch": 0.6404990288439186, "grad_norm": 32.5, "learning_rate": 7.445887409337316e-05, "loss": 2.776, "step": 21030 }, { "epoch": 0.6408035932893983, "grad_norm": 31.375, "learning_rate": 7.437728963269628e-05, "loss": 2.7499, "step": 21040 }, { "epoch": 0.6411081577348781, "grad_norm": 31.5, "learning_rate": 7.429573986739004e-05, "loss": 2.7508, "step": 21050 }, { "epoch": 0.6414127221803578, "grad_norm": 31.25, "learning_rate": 7.421422487688132e-05, "loss": 2.75, "step": 21060 }, { "epoch": 0.6417172866258376, "grad_norm": 33.0, "learning_rate": 7.413274474056307e-05, "loss": 2.7404, "step": 21070 }, { "epoch": 0.6420218510713174, "grad_norm": 33.0, "learning_rate": 7.405129953779428e-05, "loss": 2.7502, "step": 21080 }, { "epoch": 0.642326415516797, "grad_norm": 31.875, "learning_rate": 7.39698893479e-05, "loss": 2.7569, "step": 21090 }, { "epoch": 0.6426309799622768, "grad_norm": 32.75, "learning_rate": 7.388851425017111e-05, "loss": 2.7482, "step": 21100 }, { "epoch": 0.6429355444077566, "grad_norm": 30.75, "learning_rate": 7.380717432386432e-05, "loss": 2.7076, "step": 21110 }, { "epoch": 0.6432401088532363, "grad_norm": 32.5, "learning_rate": 7.372586964820211e-05, "loss": 2.7542, "step": 21120 }, { "epoch": 0.6435446732987161, "grad_norm": 32.25, "learning_rate": 7.364460030237262e-05, "loss": 2.7162, "step": 21130 }, { "epoch": 0.6438492377441959, "grad_norm": 31.125, "learning_rate": 7.356336636552952e-05, "loss": 2.743, "step": 21140 }, { "epoch": 0.6441538021896756, "grad_norm": 36.75, "learning_rate": 7.34821679167921e-05, "loss": 2.7393, "step": 21150 }, { "epoch": 0.6444583666351553, "grad_norm": 31.875, "learning_rate": 7.340100503524503e-05, "loss": 2.7347, "step": 21160 }, { "epoch": 0.6447629310806351, "grad_norm": 31.75, "learning_rate": 7.33198777999383e-05, "loss": 2.7665, "step": 21170 }, { "epoch": 0.6450674955261149, "grad_norm": 31.375, "learning_rate": 7.323878628988722e-05, "loss": 2.767, "step": 21180 }, { "epoch": 0.6453720599715946, "grad_norm": 31.375, "learning_rate": 7.315773058407232e-05, "loss": 2.7082, "step": 21190 }, { "epoch": 0.6456766244170744, "grad_norm": 32.25, "learning_rate": 7.307671076143928e-05, "loss": 2.7564, "step": 21200 }, { "epoch": 0.6459811888625541, "grad_norm": 31.25, "learning_rate": 7.299572690089871e-05, "loss": 2.7195, "step": 21210 }, { "epoch": 0.6462857533080338, "grad_norm": 35.25, "learning_rate": 7.291477908132636e-05, "loss": 2.7336, "step": 21220 }, { "epoch": 0.6465903177535136, "grad_norm": 32.5, "learning_rate": 7.283386738156273e-05, "loss": 2.7644, "step": 21230 }, { "epoch": 0.6468948821989934, "grad_norm": 32.75, "learning_rate": 7.275299188041325e-05, "loss": 2.7285, "step": 21240 }, { "epoch": 0.6471994466444732, "grad_norm": 33.25, "learning_rate": 7.267215265664804e-05, "loss": 2.7596, "step": 21250 }, { "epoch": 0.6475040110899529, "grad_norm": 31.5, "learning_rate": 7.259134978900188e-05, "loss": 2.7073, "step": 21260 }, { "epoch": 0.6478085755354326, "grad_norm": 31.375, "learning_rate": 7.251058335617418e-05, "loss": 2.7375, "step": 21270 }, { "epoch": 0.6481131399809124, "grad_norm": 31.875, "learning_rate": 7.24298534368288e-05, "loss": 2.7343, "step": 21280 }, { "epoch": 0.6484177044263921, "grad_norm": 32.0, "learning_rate": 7.234916010959414e-05, "loss": 2.71, "step": 21290 }, { "epoch": 0.6487222688718719, "grad_norm": 31.75, "learning_rate": 7.22685034530628e-05, "loss": 2.755, "step": 21300 }, { "epoch": 0.6490268333173517, "grad_norm": 31.125, "learning_rate": 7.218788354579187e-05, "loss": 2.7183, "step": 21310 }, { "epoch": 0.6493313977628313, "grad_norm": 30.875, "learning_rate": 7.210730046630249e-05, "loss": 2.7516, "step": 21320 }, { "epoch": 0.6496359622083111, "grad_norm": 31.625, "learning_rate": 7.202675429307995e-05, "loss": 2.7161, "step": 21330 }, { "epoch": 0.6499405266537909, "grad_norm": 30.75, "learning_rate": 7.194624510457369e-05, "loss": 2.7589, "step": 21340 }, { "epoch": 0.6502450910992706, "grad_norm": 32.0, "learning_rate": 7.186577297919698e-05, "loss": 2.6991, "step": 21350 }, { "epoch": 0.6505496555447504, "grad_norm": 32.25, "learning_rate": 7.178533799532717e-05, "loss": 2.7489, "step": 21360 }, { "epoch": 0.6508542199902302, "grad_norm": 31.875, "learning_rate": 7.170494023130528e-05, "loss": 2.7118, "step": 21370 }, { "epoch": 0.6511587844357098, "grad_norm": 33.5, "learning_rate": 7.162457976543616e-05, "loss": 2.7318, "step": 21380 }, { "epoch": 0.6514633488811896, "grad_norm": 31.875, "learning_rate": 7.154425667598828e-05, "loss": 2.7414, "step": 21390 }, { "epoch": 0.6517679133266694, "grad_norm": 31.875, "learning_rate": 7.146397104119373e-05, "loss": 2.7344, "step": 21400 }, { "epoch": 0.6520724777721492, "grad_norm": 31.25, "learning_rate": 7.13837229392482e-05, "loss": 2.7409, "step": 21410 }, { "epoch": 0.6523770422176289, "grad_norm": 30.875, "learning_rate": 7.130351244831068e-05, "loss": 2.7482, "step": 21420 }, { "epoch": 0.6526816066631087, "grad_norm": 31.125, "learning_rate": 7.122333964650362e-05, "loss": 2.7417, "step": 21430 }, { "epoch": 0.6529861711085884, "grad_norm": 32.0, "learning_rate": 7.114320461191277e-05, "loss": 2.7489, "step": 21440 }, { "epoch": 0.6532907355540681, "grad_norm": 31.5, "learning_rate": 7.106310742258701e-05, "loss": 2.7091, "step": 21450 }, { "epoch": 0.6535952999995479, "grad_norm": 32.75, "learning_rate": 7.098304815653851e-05, "loss": 2.7249, "step": 21460 }, { "epoch": 0.6538998644450277, "grad_norm": 31.25, "learning_rate": 7.090302689174235e-05, "loss": 2.7237, "step": 21470 }, { "epoch": 0.6542044288905075, "grad_norm": 32.5, "learning_rate": 7.082304370613666e-05, "loss": 2.7246, "step": 21480 }, { "epoch": 0.6545089933359872, "grad_norm": 31.375, "learning_rate": 7.07430986776225e-05, "loss": 2.7507, "step": 21490 }, { "epoch": 0.6548135577814669, "grad_norm": 31.125, "learning_rate": 7.066319188406369e-05, "loss": 2.7532, "step": 21500 }, { "epoch": 0.6551181222269467, "grad_norm": 33.0, "learning_rate": 7.058332340328698e-05, "loss": 2.7002, "step": 21510 }, { "epoch": 0.6554226866724264, "grad_norm": 32.0, "learning_rate": 7.050349331308164e-05, "loss": 2.7647, "step": 21520 }, { "epoch": 0.6557272511179062, "grad_norm": 32.25, "learning_rate": 7.042370169119954e-05, "loss": 2.746, "step": 21530 }, { "epoch": 0.656031815563386, "grad_norm": 30.875, "learning_rate": 7.034394861535525e-05, "loss": 2.7593, "step": 21540 }, { "epoch": 0.6563363800088656, "grad_norm": 34.75, "learning_rate": 7.026423416322561e-05, "loss": 2.7454, "step": 21550 }, { "epoch": 0.6566409444543454, "grad_norm": 31.0, "learning_rate": 7.018455841244998e-05, "loss": 2.7443, "step": 21560 }, { "epoch": 0.6569455088998252, "grad_norm": 32.25, "learning_rate": 7.010492144062997e-05, "loss": 2.7201, "step": 21570 }, { "epoch": 0.6572500733453049, "grad_norm": 31.5, "learning_rate": 7.002532332532934e-05, "loss": 2.7402, "step": 21580 }, { "epoch": 0.6575546377907847, "grad_norm": 32.25, "learning_rate": 6.994576414407421e-05, "loss": 2.6966, "step": 21590 }, { "epoch": 0.6578592022362645, "grad_norm": 31.5, "learning_rate": 6.986624397435257e-05, "loss": 2.7309, "step": 21600 }, { "epoch": 0.6581637666817441, "grad_norm": 32.0, "learning_rate": 6.97867628936145e-05, "loss": 2.7579, "step": 21610 }, { "epoch": 0.6584683311272239, "grad_norm": 32.25, "learning_rate": 6.970732097927207e-05, "loss": 2.7191, "step": 21620 }, { "epoch": 0.6587728955727037, "grad_norm": 32.5, "learning_rate": 6.962791830869906e-05, "loss": 2.7343, "step": 21630 }, { "epoch": 0.6590774600181835, "grad_norm": 30.875, "learning_rate": 6.954855495923118e-05, "loss": 2.7262, "step": 21640 }, { "epoch": 0.6593820244636632, "grad_norm": 36.25, "learning_rate": 6.946923100816576e-05, "loss": 2.7449, "step": 21650 }, { "epoch": 0.659686588909143, "grad_norm": 32.0, "learning_rate": 6.938994653276171e-05, "loss": 2.727, "step": 21660 }, { "epoch": 0.6599911533546227, "grad_norm": 31.75, "learning_rate": 6.931070161023965e-05, "loss": 2.7413, "step": 21670 }, { "epoch": 0.6602957178001024, "grad_norm": 32.5, "learning_rate": 6.923149631778146e-05, "loss": 2.7453, "step": 21680 }, { "epoch": 0.6606002822455822, "grad_norm": 32.75, "learning_rate": 6.915233073253064e-05, "loss": 2.7439, "step": 21690 }, { "epoch": 0.660904846691062, "grad_norm": 31.125, "learning_rate": 6.90732049315919e-05, "loss": 2.6791, "step": 21700 }, { "epoch": 0.6612094111365417, "grad_norm": 32.0, "learning_rate": 6.899411899203114e-05, "loss": 2.7366, "step": 21710 }, { "epoch": 0.6615139755820215, "grad_norm": 33.25, "learning_rate": 6.891507299087562e-05, "loss": 2.7486, "step": 21720 }, { "epoch": 0.6618185400275012, "grad_norm": 32.25, "learning_rate": 6.883606700511351e-05, "loss": 2.7183, "step": 21730 }, { "epoch": 0.662123104472981, "grad_norm": 32.5, "learning_rate": 6.875710111169415e-05, "loss": 2.738, "step": 21740 }, { "epoch": 0.6624276689184607, "grad_norm": 30.875, "learning_rate": 6.867817538752777e-05, "loss": 2.7153, "step": 21750 }, { "epoch": 0.6627322333639405, "grad_norm": 31.625, "learning_rate": 6.85992899094854e-05, "loss": 2.7252, "step": 21760 }, { "epoch": 0.6630367978094203, "grad_norm": 32.5, "learning_rate": 6.852044475439907e-05, "loss": 2.7227, "step": 21770 }, { "epoch": 0.6633413622548999, "grad_norm": 32.75, "learning_rate": 6.844163999906132e-05, "loss": 2.6942, "step": 21780 }, { "epoch": 0.6636459267003797, "grad_norm": 33.5, "learning_rate": 6.836287572022548e-05, "loss": 2.6939, "step": 21790 }, { "epoch": 0.6639504911458595, "grad_norm": 33.0, "learning_rate": 6.828415199460542e-05, "loss": 2.75, "step": 21800 }, { "epoch": 0.6642550555913392, "grad_norm": 31.875, "learning_rate": 6.820546889887547e-05, "loss": 2.7788, "step": 21810 }, { "epoch": 0.664559620036819, "grad_norm": 32.25, "learning_rate": 6.81268265096705e-05, "loss": 2.7256, "step": 21820 }, { "epoch": 0.6648641844822988, "grad_norm": 32.0, "learning_rate": 6.804822490358557e-05, "loss": 2.707, "step": 21830 }, { "epoch": 0.6651687489277784, "grad_norm": 32.75, "learning_rate": 6.796966415717618e-05, "loss": 2.742, "step": 21840 }, { "epoch": 0.6654733133732582, "grad_norm": 31.25, "learning_rate": 6.789114434695797e-05, "loss": 2.7806, "step": 21850 }, { "epoch": 0.665777877818738, "grad_norm": 31.75, "learning_rate": 6.781266554940661e-05, "loss": 2.7339, "step": 21860 }, { "epoch": 0.6660824422642178, "grad_norm": 33.0, "learning_rate": 6.773422784095803e-05, "loss": 2.729, "step": 21870 }, { "epoch": 0.6663870067096975, "grad_norm": 31.375, "learning_rate": 6.765583129800792e-05, "loss": 2.6882, "step": 21880 }, { "epoch": 0.6666915711551773, "grad_norm": 31.875, "learning_rate": 6.757747599691209e-05, "loss": 2.711, "step": 21890 }, { "epoch": 0.666996135600657, "grad_norm": 31.5, "learning_rate": 6.749916201398602e-05, "loss": 2.7239, "step": 21900 }, { "epoch": 0.6673007000461367, "grad_norm": 32.75, "learning_rate": 6.742088942550497e-05, "loss": 2.7245, "step": 21910 }, { "epoch": 0.6676052644916165, "grad_norm": 31.25, "learning_rate": 6.734265830770398e-05, "loss": 2.7492, "step": 21920 }, { "epoch": 0.6679098289370963, "grad_norm": 33.0, "learning_rate": 6.726446873677758e-05, "loss": 2.74, "step": 21930 }, { "epoch": 0.668214393382576, "grad_norm": 30.875, "learning_rate": 6.718632078887996e-05, "loss": 2.7281, "step": 21940 }, { "epoch": 0.6685189578280558, "grad_norm": 31.75, "learning_rate": 6.710821454012466e-05, "loss": 2.7134, "step": 21950 }, { "epoch": 0.6688235222735355, "grad_norm": 34.0, "learning_rate": 6.703015006658459e-05, "loss": 2.7213, "step": 21960 }, { "epoch": 0.6691280867190152, "grad_norm": 31.0, "learning_rate": 6.695212744429212e-05, "loss": 2.7133, "step": 21970 }, { "epoch": 0.669432651164495, "grad_norm": 31.875, "learning_rate": 6.687414674923875e-05, "loss": 2.7208, "step": 21980 }, { "epoch": 0.6697372156099748, "grad_norm": 32.0, "learning_rate": 6.679620805737508e-05, "loss": 2.7675, "step": 21990 }, { "epoch": 0.6700417800554546, "grad_norm": 32.0, "learning_rate": 6.6718311444611e-05, "loss": 2.7658, "step": 22000 }, { "epoch": 0.6703463445009342, "grad_norm": 32.5, "learning_rate": 6.664045698681518e-05, "loss": 2.7384, "step": 22010 }, { "epoch": 0.670650908946414, "grad_norm": 32.5, "learning_rate": 6.656264475981546e-05, "loss": 2.7136, "step": 22020 }, { "epoch": 0.6709554733918938, "grad_norm": 32.5, "learning_rate": 6.64848748393984e-05, "loss": 2.757, "step": 22030 }, { "epoch": 0.6712600378373735, "grad_norm": 32.75, "learning_rate": 6.640714730130935e-05, "loss": 2.7211, "step": 22040 }, { "epoch": 0.6715646022828533, "grad_norm": 32.25, "learning_rate": 6.632946222125246e-05, "loss": 2.7078, "step": 22050 }, { "epoch": 0.6718691667283331, "grad_norm": 31.125, "learning_rate": 6.625181967489048e-05, "loss": 2.7453, "step": 22060 }, { "epoch": 0.6721737311738127, "grad_norm": 31.5, "learning_rate": 6.617421973784479e-05, "loss": 2.7418, "step": 22070 }, { "epoch": 0.6724782956192925, "grad_norm": 31.5, "learning_rate": 6.609666248569517e-05, "loss": 2.7184, "step": 22080 }, { "epoch": 0.6727828600647723, "grad_norm": 31.875, "learning_rate": 6.601914799397988e-05, "loss": 2.7557, "step": 22090 }, { "epoch": 0.673087424510252, "grad_norm": 32.5, "learning_rate": 6.594167633819557e-05, "loss": 2.7364, "step": 22100 }, { "epoch": 0.6733919889557318, "grad_norm": 32.75, "learning_rate": 6.58642475937971e-05, "loss": 2.7097, "step": 22110 }, { "epoch": 0.6736965534012116, "grad_norm": 30.0, "learning_rate": 6.578686183619762e-05, "loss": 2.727, "step": 22120 }, { "epoch": 0.6740011178466913, "grad_norm": 32.75, "learning_rate": 6.570951914076833e-05, "loss": 2.784, "step": 22130 }, { "epoch": 0.674305682292171, "grad_norm": 32.75, "learning_rate": 6.563221958283848e-05, "loss": 2.7157, "step": 22140 }, { "epoch": 0.6746102467376508, "grad_norm": 31.875, "learning_rate": 6.555496323769542e-05, "loss": 2.7575, "step": 22150 }, { "epoch": 0.6749148111831306, "grad_norm": 33.0, "learning_rate": 6.54777501805843e-05, "loss": 2.7245, "step": 22160 }, { "epoch": 0.6752193756286103, "grad_norm": 32.0, "learning_rate": 6.540058048670816e-05, "loss": 2.7268, "step": 22170 }, { "epoch": 0.6755239400740901, "grad_norm": 32.25, "learning_rate": 6.532345423122778e-05, "loss": 2.7265, "step": 22180 }, { "epoch": 0.6758285045195698, "grad_norm": 32.5, "learning_rate": 6.52463714892616e-05, "loss": 2.7525, "step": 22190 }, { "epoch": 0.6761330689650495, "grad_norm": 34.0, "learning_rate": 6.516933233588582e-05, "loss": 2.7592, "step": 22200 }, { "epoch": 0.6764376334105293, "grad_norm": 33.25, "learning_rate": 6.5092336846134e-05, "loss": 2.7682, "step": 22210 }, { "epoch": 0.6767421978560091, "grad_norm": 31.25, "learning_rate": 6.501538509499731e-05, "loss": 2.7134, "step": 22220 }, { "epoch": 0.6770467623014889, "grad_norm": 31.375, "learning_rate": 6.49384771574243e-05, "loss": 2.746, "step": 22230 }, { "epoch": 0.6773513267469685, "grad_norm": 30.25, "learning_rate": 6.486161310832073e-05, "loss": 2.7728, "step": 22240 }, { "epoch": 0.6776558911924483, "grad_norm": 32.5, "learning_rate": 6.47847930225498e-05, "loss": 2.7145, "step": 22250 }, { "epoch": 0.6779604556379281, "grad_norm": 31.75, "learning_rate": 6.470801697493173e-05, "loss": 2.7478, "step": 22260 }, { "epoch": 0.6782650200834078, "grad_norm": 33.75, "learning_rate": 6.463128504024396e-05, "loss": 2.7093, "step": 22270 }, { "epoch": 0.6785695845288876, "grad_norm": 31.75, "learning_rate": 6.455459729322091e-05, "loss": 2.7503, "step": 22280 }, { "epoch": 0.6788741489743674, "grad_norm": 30.5, "learning_rate": 6.447795380855392e-05, "loss": 2.717, "step": 22290 }, { "epoch": 0.679178713419847, "grad_norm": 32.5, "learning_rate": 6.440135466089134e-05, "loss": 2.7362, "step": 22300 }, { "epoch": 0.6794832778653268, "grad_norm": 32.75, "learning_rate": 6.432479992483823e-05, "loss": 2.7284, "step": 22310 }, { "epoch": 0.6797878423108066, "grad_norm": 31.25, "learning_rate": 6.424828967495642e-05, "loss": 2.7485, "step": 22320 }, { "epoch": 0.6800924067562863, "grad_norm": 31.0, "learning_rate": 6.417182398576447e-05, "loss": 2.7388, "step": 22330 }, { "epoch": 0.6803969712017661, "grad_norm": 33.25, "learning_rate": 6.409540293173746e-05, "loss": 2.7462, "step": 22340 }, { "epoch": 0.6807015356472459, "grad_norm": 31.625, "learning_rate": 6.401902658730705e-05, "loss": 2.7072, "step": 22350 }, { "epoch": 0.6810061000927256, "grad_norm": 32.25, "learning_rate": 6.394269502686136e-05, "loss": 2.7606, "step": 22360 }, { "epoch": 0.6813106645382053, "grad_norm": 32.75, "learning_rate": 6.386640832474477e-05, "loss": 2.7668, "step": 22370 }, { "epoch": 0.6816152289836851, "grad_norm": 33.0, "learning_rate": 6.379016655525818e-05, "loss": 2.7367, "step": 22380 }, { "epoch": 0.6819197934291649, "grad_norm": 34.0, "learning_rate": 6.371396979265854e-05, "loss": 2.7859, "step": 22390 }, { "epoch": 0.6822243578746446, "grad_norm": 31.5, "learning_rate": 6.363781811115907e-05, "loss": 2.714, "step": 22400 }, { "epoch": 0.6825289223201244, "grad_norm": 31.875, "learning_rate": 6.356171158492908e-05, "loss": 2.7832, "step": 22410 }, { "epoch": 0.6828334867656041, "grad_norm": 32.0, "learning_rate": 6.348565028809377e-05, "loss": 2.7306, "step": 22420 }, { "epoch": 0.6831380512110838, "grad_norm": 31.625, "learning_rate": 6.340963429473451e-05, "loss": 2.7304, "step": 22430 }, { "epoch": 0.6834426156565636, "grad_norm": 34.75, "learning_rate": 6.333366367888835e-05, "loss": 2.7578, "step": 22440 }, { "epoch": 0.6837471801020434, "grad_norm": 32.5, "learning_rate": 6.325773851454824e-05, "loss": 2.7698, "step": 22450 }, { "epoch": 0.6840517445475232, "grad_norm": 31.75, "learning_rate": 6.318185887566285e-05, "loss": 2.7334, "step": 22460 }, { "epoch": 0.6843563089930028, "grad_norm": 32.5, "learning_rate": 6.310602483613649e-05, "loss": 2.7367, "step": 22470 }, { "epoch": 0.6846608734384826, "grad_norm": 33.0, "learning_rate": 6.303023646982907e-05, "loss": 2.7528, "step": 22480 }, { "epoch": 0.6849654378839624, "grad_norm": 31.625, "learning_rate": 6.295449385055601e-05, "loss": 2.762, "step": 22490 }, { "epoch": 0.6852700023294421, "grad_norm": 31.625, "learning_rate": 6.287879705208817e-05, "loss": 2.7444, "step": 22500 }, { "epoch": 0.6855745667749219, "grad_norm": 32.25, "learning_rate": 6.280314614815181e-05, "loss": 2.7258, "step": 22510 }, { "epoch": 0.6858791312204017, "grad_norm": 33.0, "learning_rate": 6.272754121242843e-05, "loss": 2.7165, "step": 22520 }, { "epoch": 0.6861836956658813, "grad_norm": 31.5, "learning_rate": 6.265198231855487e-05, "loss": 2.7077, "step": 22530 }, { "epoch": 0.6864882601113611, "grad_norm": 31.875, "learning_rate": 6.2576469540123e-05, "loss": 2.74, "step": 22540 }, { "epoch": 0.6867928245568409, "grad_norm": 30.875, "learning_rate": 6.250100295067984e-05, "loss": 2.7537, "step": 22550 }, { "epoch": 0.6870973890023206, "grad_norm": 31.875, "learning_rate": 6.242558262372746e-05, "loss": 2.7497, "step": 22560 }, { "epoch": 0.6874019534478004, "grad_norm": 32.25, "learning_rate": 6.235020863272271e-05, "loss": 2.748, "step": 22570 }, { "epoch": 0.6877065178932802, "grad_norm": 32.5, "learning_rate": 6.227488105107753e-05, "loss": 2.7208, "step": 22580 }, { "epoch": 0.6880110823387598, "grad_norm": 32.75, "learning_rate": 6.21995999521585e-05, "loss": 2.6934, "step": 22590 }, { "epoch": 0.6883156467842396, "grad_norm": 32.0, "learning_rate": 6.212436540928703e-05, "loss": 2.7308, "step": 22600 }, { "epoch": 0.6886202112297194, "grad_norm": 32.0, "learning_rate": 6.204917749573911e-05, "loss": 2.7362, "step": 22610 }, { "epoch": 0.6889247756751992, "grad_norm": 32.25, "learning_rate": 6.197403628474532e-05, "loss": 2.7248, "step": 22620 }, { "epoch": 0.6892293401206789, "grad_norm": 32.75, "learning_rate": 6.189894184949084e-05, "loss": 2.7454, "step": 22630 }, { "epoch": 0.6895339045661587, "grad_norm": 32.0, "learning_rate": 6.182389426311512e-05, "loss": 2.712, "step": 22640 }, { "epoch": 0.6898384690116384, "grad_norm": 32.0, "learning_rate": 6.17488935987122e-05, "loss": 2.7429, "step": 22650 }, { "epoch": 0.6901430334571181, "grad_norm": 33.75, "learning_rate": 6.167393992933025e-05, "loss": 2.7595, "step": 22660 }, { "epoch": 0.6904475979025979, "grad_norm": 32.5, "learning_rate": 6.159903332797172e-05, "loss": 2.7481, "step": 22670 }, { "epoch": 0.6907521623480777, "grad_norm": 30.875, "learning_rate": 6.152417386759329e-05, "loss": 2.6706, "step": 22680 }, { "epoch": 0.6910567267935575, "grad_norm": 31.0, "learning_rate": 6.144936162110558e-05, "loss": 2.7048, "step": 22690 }, { "epoch": 0.6913612912390371, "grad_norm": 32.0, "learning_rate": 6.13745966613733e-05, "loss": 2.7297, "step": 22700 }, { "epoch": 0.6916658556845169, "grad_norm": 34.25, "learning_rate": 6.129987906121519e-05, "loss": 2.7274, "step": 22710 }, { "epoch": 0.6919704201299967, "grad_norm": 31.625, "learning_rate": 6.122520889340371e-05, "loss": 2.7431, "step": 22720 }, { "epoch": 0.6922749845754764, "grad_norm": 31.25, "learning_rate": 6.115058623066523e-05, "loss": 2.7179, "step": 22730 }, { "epoch": 0.6925795490209562, "grad_norm": 31.5, "learning_rate": 6.107601114567982e-05, "loss": 2.7198, "step": 22740 }, { "epoch": 0.692884113466436, "grad_norm": 30.875, "learning_rate": 6.100148371108114e-05, "loss": 2.7596, "step": 22750 }, { "epoch": 0.6931886779119156, "grad_norm": 33.0, "learning_rate": 6.092700399945661e-05, "loss": 2.7304, "step": 22760 }, { "epoch": 0.6934932423573954, "grad_norm": 31.625, "learning_rate": 6.085257208334698e-05, "loss": 2.7298, "step": 22770 }, { "epoch": 0.6937978068028752, "grad_norm": 30.875, "learning_rate": 6.077818803524661e-05, "loss": 2.7636, "step": 22780 }, { "epoch": 0.6941023712483549, "grad_norm": 31.25, "learning_rate": 6.070385192760311e-05, "loss": 2.7334, "step": 22790 }, { "epoch": 0.6944069356938347, "grad_norm": 32.25, "learning_rate": 6.0629563832817436e-05, "loss": 2.7043, "step": 22800 }, { "epoch": 0.6947115001393145, "grad_norm": 32.5, "learning_rate": 6.0555323823243846e-05, "loss": 2.7293, "step": 22810 }, { "epoch": 0.6950160645847941, "grad_norm": 31.625, "learning_rate": 6.048113197118967e-05, "loss": 2.7504, "step": 22820 }, { "epoch": 0.6953206290302739, "grad_norm": 31.875, "learning_rate": 6.040698834891543e-05, "loss": 2.7576, "step": 22830 }, { "epoch": 0.6956251934757537, "grad_norm": 31.25, "learning_rate": 6.0332893028634607e-05, "loss": 2.754, "step": 22840 }, { "epoch": 0.6959297579212335, "grad_norm": 32.0, "learning_rate": 6.0258846082513585e-05, "loss": 2.7076, "step": 22850 }, { "epoch": 0.6962343223667132, "grad_norm": 31.625, "learning_rate": 6.01848475826718e-05, "loss": 2.7493, "step": 22860 }, { "epoch": 0.696538886812193, "grad_norm": 32.25, "learning_rate": 6.011089760118135e-05, "loss": 2.7411, "step": 22870 }, { "epoch": 0.6968434512576727, "grad_norm": 32.5, "learning_rate": 6.0036996210067095e-05, "loss": 2.733, "step": 22880 }, { "epoch": 0.6971480157031524, "grad_norm": 30.75, "learning_rate": 5.996314348130669e-05, "loss": 2.7109, "step": 22890 }, { "epoch": 0.6974525801486322, "grad_norm": 31.375, "learning_rate": 5.9889339486830244e-05, "loss": 2.7262, "step": 22900 }, { "epoch": 0.697757144594112, "grad_norm": 32.0, "learning_rate": 5.981558429852052e-05, "loss": 2.7029, "step": 22910 }, { "epoch": 0.6980617090395917, "grad_norm": 31.25, "learning_rate": 5.974187798821267e-05, "loss": 2.7156, "step": 22920 }, { "epoch": 0.6983662734850714, "grad_norm": 30.75, "learning_rate": 5.966822062769421e-05, "loss": 2.7232, "step": 22930 }, { "epoch": 0.6986708379305512, "grad_norm": 31.625, "learning_rate": 5.959461228870512e-05, "loss": 2.7275, "step": 22940 }, { "epoch": 0.698975402376031, "grad_norm": 32.25, "learning_rate": 5.95210530429375e-05, "loss": 2.7338, "step": 22950 }, { "epoch": 0.6992799668215107, "grad_norm": 32.5, "learning_rate": 5.94475429620357e-05, "loss": 2.6937, "step": 22960 }, { "epoch": 0.6995845312669905, "grad_norm": 31.375, "learning_rate": 5.937408211759612e-05, "loss": 2.6811, "step": 22970 }, { "epoch": 0.6998890957124703, "grad_norm": 32.0, "learning_rate": 5.9300670581167314e-05, "loss": 2.7366, "step": 22980 }, { "epoch": 0.7001936601579499, "grad_norm": 31.625, "learning_rate": 5.922730842424973e-05, "loss": 2.723, "step": 22990 }, { "epoch": 0.7004982246034297, "grad_norm": 33.0, "learning_rate": 5.9153995718295706e-05, "loss": 2.7096, "step": 23000 }, { "epoch": 0.7008027890489095, "grad_norm": 31.125, "learning_rate": 5.908073253470952e-05, "loss": 2.6906, "step": 23010 }, { "epoch": 0.7011073534943892, "grad_norm": 32.25, "learning_rate": 5.900751894484706e-05, "loss": 2.7184, "step": 23020 }, { "epoch": 0.701411917939869, "grad_norm": 36.5, "learning_rate": 5.8934355020016106e-05, "loss": 2.731, "step": 23030 }, { "epoch": 0.7017164823853488, "grad_norm": 32.5, "learning_rate": 5.8861240831475894e-05, "loss": 2.7192, "step": 23040 }, { "epoch": 0.7020210468308284, "grad_norm": 32.75, "learning_rate": 5.878817645043728e-05, "loss": 2.7325, "step": 23050 }, { "epoch": 0.7023256112763082, "grad_norm": 31.75, "learning_rate": 5.871516194806265e-05, "loss": 2.7733, "step": 23060 }, { "epoch": 0.702630175721788, "grad_norm": 32.25, "learning_rate": 5.864219739546577e-05, "loss": 2.6946, "step": 23070 }, { "epoch": 0.7029347401672678, "grad_norm": 31.375, "learning_rate": 5.8569282863711696e-05, "loss": 2.7422, "step": 23080 }, { "epoch": 0.7032393046127475, "grad_norm": 31.5, "learning_rate": 5.849641842381692e-05, "loss": 2.7258, "step": 23090 }, { "epoch": 0.7035438690582273, "grad_norm": 32.25, "learning_rate": 5.842360414674899e-05, "loss": 2.7281, "step": 23100 }, { "epoch": 0.703848433503707, "grad_norm": 30.875, "learning_rate": 5.835084010342672e-05, "loss": 2.7397, "step": 23110 }, { "epoch": 0.7041529979491867, "grad_norm": 32.0, "learning_rate": 5.8278126364719896e-05, "loss": 2.7322, "step": 23120 }, { "epoch": 0.7044575623946665, "grad_norm": 34.75, "learning_rate": 5.8205463001449344e-05, "loss": 2.7273, "step": 23130 }, { "epoch": 0.7047621268401463, "grad_norm": 32.25, "learning_rate": 5.8132850084386894e-05, "loss": 2.7337, "step": 23140 }, { "epoch": 0.705066691285626, "grad_norm": 31.75, "learning_rate": 5.806028768425511e-05, "loss": 2.7742, "step": 23150 }, { "epoch": 0.7053712557311057, "grad_norm": 31.875, "learning_rate": 5.79877758717275e-05, "loss": 2.7159, "step": 23160 }, { "epoch": 0.7056758201765855, "grad_norm": 32.0, "learning_rate": 5.7915314717428195e-05, "loss": 2.7155, "step": 23170 }, { "epoch": 0.7059803846220652, "grad_norm": 31.375, "learning_rate": 5.7842904291932e-05, "loss": 2.7034, "step": 23180 }, { "epoch": 0.706284949067545, "grad_norm": 32.75, "learning_rate": 5.777054466576438e-05, "loss": 2.7102, "step": 23190 }, { "epoch": 0.7065895135130248, "grad_norm": 32.75, "learning_rate": 5.769823590940127e-05, "loss": 2.7136, "step": 23200 }, { "epoch": 0.7068940779585046, "grad_norm": 31.0, "learning_rate": 5.762597809326901e-05, "loss": 2.7315, "step": 23210 }, { "epoch": 0.7071986424039842, "grad_norm": 31.625, "learning_rate": 5.755377128774445e-05, "loss": 2.7972, "step": 23220 }, { "epoch": 0.707503206849464, "grad_norm": 32.25, "learning_rate": 5.7481615563154635e-05, "loss": 2.7321, "step": 23230 }, { "epoch": 0.7078077712949438, "grad_norm": 32.0, "learning_rate": 5.740951098977696e-05, "loss": 2.7091, "step": 23240 }, { "epoch": 0.7081123357404235, "grad_norm": 33.75, "learning_rate": 5.733745763783893e-05, "loss": 2.7565, "step": 23250 }, { "epoch": 0.7084169001859033, "grad_norm": 33.0, "learning_rate": 5.726545557751817e-05, "loss": 2.6887, "step": 23260 }, { "epoch": 0.7087214646313831, "grad_norm": 31.375, "learning_rate": 5.71935048789424e-05, "loss": 2.7132, "step": 23270 }, { "epoch": 0.7090260290768627, "grad_norm": 33.0, "learning_rate": 5.712160561218925e-05, "loss": 2.6731, "step": 23280 }, { "epoch": 0.7093305935223425, "grad_norm": 34.25, "learning_rate": 5.70497578472863e-05, "loss": 2.7101, "step": 23290 }, { "epoch": 0.7096351579678223, "grad_norm": 32.0, "learning_rate": 5.697796165421097e-05, "loss": 2.7186, "step": 23300 }, { "epoch": 0.709939722413302, "grad_norm": 31.5, "learning_rate": 5.690621710289038e-05, "loss": 2.75, "step": 23310 }, { "epoch": 0.7102442868587818, "grad_norm": 32.25, "learning_rate": 5.683452426320148e-05, "loss": 2.7081, "step": 23320 }, { "epoch": 0.7105488513042616, "grad_norm": 33.0, "learning_rate": 5.6762883204970716e-05, "loss": 2.7167, "step": 23330 }, { "epoch": 0.7108534157497413, "grad_norm": 32.75, "learning_rate": 5.6691293997974216e-05, "loss": 2.7417, "step": 23340 }, { "epoch": 0.711157980195221, "grad_norm": 33.75, "learning_rate": 5.6619756711937564e-05, "loss": 2.7496, "step": 23350 }, { "epoch": 0.7114625446407008, "grad_norm": 31.875, "learning_rate": 5.6548271416535746e-05, "loss": 2.7338, "step": 23360 }, { "epoch": 0.7117671090861806, "grad_norm": 32.0, "learning_rate": 5.6476838181393134e-05, "loss": 2.7498, "step": 23370 }, { "epoch": 0.7120716735316603, "grad_norm": 30.875, "learning_rate": 5.640545707608335e-05, "loss": 2.7288, "step": 23380 }, { "epoch": 0.7123762379771401, "grad_norm": 31.25, "learning_rate": 5.6334128170129376e-05, "loss": 2.7337, "step": 23390 }, { "epoch": 0.7126808024226198, "grad_norm": 31.875, "learning_rate": 5.626285153300321e-05, "loss": 2.7635, "step": 23400 }, { "epoch": 0.7129853668680995, "grad_norm": 32.0, "learning_rate": 5.6191627234125985e-05, "loss": 2.7545, "step": 23410 }, { "epoch": 0.7132899313135793, "grad_norm": 32.75, "learning_rate": 5.6120455342867915e-05, "loss": 2.7155, "step": 23420 }, { "epoch": 0.7135944957590591, "grad_norm": 30.875, "learning_rate": 5.60493359285481e-05, "loss": 2.7271, "step": 23430 }, { "epoch": 0.7138990602045389, "grad_norm": 31.75, "learning_rate": 5.5978269060434575e-05, "loss": 2.7189, "step": 23440 }, { "epoch": 0.7142036246500185, "grad_norm": 31.75, "learning_rate": 5.590725480774417e-05, "loss": 2.7124, "step": 23450 }, { "epoch": 0.7145081890954983, "grad_norm": 31.375, "learning_rate": 5.583629323964241e-05, "loss": 2.7421, "step": 23460 }, { "epoch": 0.7148127535409781, "grad_norm": 35.0, "learning_rate": 5.576538442524369e-05, "loss": 2.7149, "step": 23470 }, { "epoch": 0.7151173179864578, "grad_norm": 32.5, "learning_rate": 5.5694528433610806e-05, "loss": 2.7406, "step": 23480 }, { "epoch": 0.7154218824319376, "grad_norm": 32.0, "learning_rate": 5.56237253337553e-05, "loss": 2.7689, "step": 23490 }, { "epoch": 0.7157264468774174, "grad_norm": 31.625, "learning_rate": 5.5552975194637024e-05, "loss": 2.7134, "step": 23500 }, { "epoch": 0.716031011322897, "grad_norm": 31.875, "learning_rate": 5.5482278085164374e-05, "loss": 2.7674, "step": 23510 }, { "epoch": 0.7163355757683768, "grad_norm": 30.75, "learning_rate": 5.541163407419405e-05, "loss": 2.7288, "step": 23520 }, { "epoch": 0.7166401402138566, "grad_norm": 32.75, "learning_rate": 5.5341043230530996e-05, "loss": 2.7437, "step": 23530 }, { "epoch": 0.7169447046593364, "grad_norm": 32.5, "learning_rate": 5.5270505622928506e-05, "loss": 2.7426, "step": 23540 }, { "epoch": 0.7172492691048161, "grad_norm": 34.75, "learning_rate": 5.520002132008788e-05, "loss": 2.7385, "step": 23550 }, { "epoch": 0.7175538335502959, "grad_norm": 32.25, "learning_rate": 5.512959039065853e-05, "loss": 2.7369, "step": 23560 }, { "epoch": 0.7178583979957756, "grad_norm": 31.75, "learning_rate": 5.505921290323794e-05, "loss": 2.7252, "step": 23570 }, { "epoch": 0.7181629624412553, "grad_norm": 32.5, "learning_rate": 5.498888892637153e-05, "loss": 2.7341, "step": 23580 }, { "epoch": 0.7184675268867351, "grad_norm": 32.5, "learning_rate": 5.491861852855253e-05, "loss": 2.7453, "step": 23590 }, { "epoch": 0.7187720913322149, "grad_norm": 31.125, "learning_rate": 5.48484017782221e-05, "loss": 2.7093, "step": 23600 }, { "epoch": 0.7190766557776946, "grad_norm": 32.5, "learning_rate": 5.477823874376905e-05, "loss": 2.7552, "step": 23610 }, { "epoch": 0.7193812202231744, "grad_norm": 32.0, "learning_rate": 5.4708129493529945e-05, "loss": 2.7677, "step": 23620 }, { "epoch": 0.7196857846686541, "grad_norm": 31.75, "learning_rate": 5.463807409578891e-05, "loss": 2.7523, "step": 23630 }, { "epoch": 0.7199903491141338, "grad_norm": 31.875, "learning_rate": 5.456807261877762e-05, "loss": 2.7426, "step": 23640 }, { "epoch": 0.7202949135596136, "grad_norm": 32.75, "learning_rate": 5.449812513067532e-05, "loss": 2.6975, "step": 23650 }, { "epoch": 0.7205994780050934, "grad_norm": 32.0, "learning_rate": 5.442823169960851e-05, "loss": 2.74, "step": 23660 }, { "epoch": 0.7209040424505732, "grad_norm": 31.25, "learning_rate": 5.435839239365126e-05, "loss": 2.7422, "step": 23670 }, { "epoch": 0.7212086068960528, "grad_norm": 33.25, "learning_rate": 5.428860728082472e-05, "loss": 2.6903, "step": 23680 }, { "epoch": 0.7215131713415326, "grad_norm": 32.25, "learning_rate": 5.421887642909732e-05, "loss": 2.7251, "step": 23690 }, { "epoch": 0.7218177357870124, "grad_norm": 32.25, "learning_rate": 5.4149199906384726e-05, "loss": 2.7721, "step": 23700 }, { "epoch": 0.7221223002324921, "grad_norm": 32.5, "learning_rate": 5.4079577780549575e-05, "loss": 2.7339, "step": 23710 }, { "epoch": 0.7224268646779719, "grad_norm": 31.25, "learning_rate": 5.4010010119401613e-05, "loss": 2.7066, "step": 23720 }, { "epoch": 0.7227314291234517, "grad_norm": 31.5, "learning_rate": 5.394049699069745e-05, "loss": 2.7219, "step": 23730 }, { "epoch": 0.7230359935689313, "grad_norm": 33.5, "learning_rate": 5.387103846214068e-05, "loss": 2.7121, "step": 23740 }, { "epoch": 0.7233405580144111, "grad_norm": 32.25, "learning_rate": 5.380163460138163e-05, "loss": 2.7492, "step": 23750 }, { "epoch": 0.7236451224598909, "grad_norm": 31.625, "learning_rate": 5.3732285476017405e-05, "loss": 2.7214, "step": 23760 }, { "epoch": 0.7239496869053706, "grad_norm": 32.5, "learning_rate": 5.366299115359188e-05, "loss": 2.7184, "step": 23770 }, { "epoch": 0.7242542513508504, "grad_norm": 32.5, "learning_rate": 5.359375170159544e-05, "loss": 2.7084, "step": 23780 }, { "epoch": 0.7245588157963302, "grad_norm": 32.25, "learning_rate": 5.352456718746508e-05, "loss": 2.688, "step": 23790 }, { "epoch": 0.7248633802418099, "grad_norm": 32.0, "learning_rate": 5.3455437678584316e-05, "loss": 2.7142, "step": 23800 }, { "epoch": 0.7251679446872896, "grad_norm": 31.875, "learning_rate": 5.3386363242283014e-05, "loss": 2.7308, "step": 23810 }, { "epoch": 0.7254725091327694, "grad_norm": 32.5, "learning_rate": 5.3317343945837494e-05, "loss": 2.763, "step": 23820 }, { "epoch": 0.7257770735782492, "grad_norm": 34.75, "learning_rate": 5.324837985647031e-05, "loss": 2.7455, "step": 23830 }, { "epoch": 0.7260816380237289, "grad_norm": 31.875, "learning_rate": 5.3179471041350226e-05, "loss": 2.7448, "step": 23840 }, { "epoch": 0.7263862024692087, "grad_norm": 31.875, "learning_rate": 5.311061756759226e-05, "loss": 2.7422, "step": 23850 }, { "epoch": 0.7266907669146884, "grad_norm": 32.25, "learning_rate": 5.30418195022574e-05, "loss": 2.7332, "step": 23860 }, { "epoch": 0.7269953313601681, "grad_norm": 35.0, "learning_rate": 5.2973076912352825e-05, "loss": 2.7008, "step": 23870 }, { "epoch": 0.7272998958056479, "grad_norm": 32.5, "learning_rate": 5.2904389864831574e-05, "loss": 2.7488, "step": 23880 }, { "epoch": 0.7276044602511277, "grad_norm": 32.5, "learning_rate": 5.2835758426592564e-05, "loss": 2.7231, "step": 23890 }, { "epoch": 0.7279090246966075, "grad_norm": 32.5, "learning_rate": 5.276718266448067e-05, "loss": 2.7408, "step": 23900 }, { "epoch": 0.7282135891420871, "grad_norm": 32.25, "learning_rate": 5.269866264528642e-05, "loss": 2.7384, "step": 23910 }, { "epoch": 0.7285181535875669, "grad_norm": 31.125, "learning_rate": 5.263019843574615e-05, "loss": 2.7341, "step": 23920 }, { "epoch": 0.7288227180330467, "grad_norm": 32.5, "learning_rate": 5.256179010254176e-05, "loss": 2.7174, "step": 23930 }, { "epoch": 0.7291272824785264, "grad_norm": 31.5, "learning_rate": 5.249343771230075e-05, "loss": 2.724, "step": 23940 }, { "epoch": 0.7294318469240062, "grad_norm": 33.25, "learning_rate": 5.2425141331596187e-05, "loss": 2.7317, "step": 23950 }, { "epoch": 0.729736411369486, "grad_norm": 31.75, "learning_rate": 5.2356901026946506e-05, "loss": 2.7148, "step": 23960 }, { "epoch": 0.7300409758149656, "grad_norm": 32.5, "learning_rate": 5.228871686481555e-05, "loss": 2.7166, "step": 23970 }, { "epoch": 0.7303455402604454, "grad_norm": 31.375, "learning_rate": 5.222058891161254e-05, "loss": 2.7422, "step": 23980 }, { "epoch": 0.7306501047059252, "grad_norm": 31.25, "learning_rate": 5.215251723369184e-05, "loss": 2.6994, "step": 23990 }, { "epoch": 0.730954669151405, "grad_norm": 31.75, "learning_rate": 5.2084501897353144e-05, "loss": 2.7095, "step": 24000 }, { "epoch": 0.7312592335968847, "grad_norm": 32.25, "learning_rate": 5.2016542968841165e-05, "loss": 2.7296, "step": 24010 }, { "epoch": 0.7315637980423645, "grad_norm": 32.0, "learning_rate": 5.194864051434567e-05, "loss": 2.7107, "step": 24020 }, { "epoch": 0.7318683624878441, "grad_norm": 31.375, "learning_rate": 5.1880794600001504e-05, "loss": 2.7264, "step": 24030 }, { "epoch": 0.7321729269333239, "grad_norm": 32.75, "learning_rate": 5.1813005291888374e-05, "loss": 2.6877, "step": 24040 }, { "epoch": 0.7324774913788037, "grad_norm": 32.75, "learning_rate": 5.1745272656030905e-05, "loss": 2.7157, "step": 24050 }, { "epoch": 0.7327820558242835, "grad_norm": 35.0, "learning_rate": 5.1677596758398495e-05, "loss": 2.7242, "step": 24060 }, { "epoch": 0.7330866202697632, "grad_norm": 32.25, "learning_rate": 5.1609977664905216e-05, "loss": 2.7248, "step": 24070 }, { "epoch": 0.733391184715243, "grad_norm": 31.625, "learning_rate": 5.154241544141001e-05, "loss": 2.7159, "step": 24080 }, { "epoch": 0.7336957491607227, "grad_norm": 31.0, "learning_rate": 5.147491015371617e-05, "loss": 2.7416, "step": 24090 }, { "epoch": 0.7340003136062024, "grad_norm": 30.625, "learning_rate": 5.140746186757178e-05, "loss": 2.7372, "step": 24100 }, { "epoch": 0.7343048780516822, "grad_norm": 31.625, "learning_rate": 5.134007064866923e-05, "loss": 2.7244, "step": 24110 }, { "epoch": 0.734609442497162, "grad_norm": 32.5, "learning_rate": 5.1272736562645396e-05, "loss": 2.7202, "step": 24120 }, { "epoch": 0.7349140069426418, "grad_norm": 34.0, "learning_rate": 5.120545967508154e-05, "loss": 2.7012, "step": 24130 }, { "epoch": 0.7352185713881214, "grad_norm": 32.5, "learning_rate": 5.1138240051503175e-05, "loss": 2.7049, "step": 24140 }, { "epoch": 0.7355231358336012, "grad_norm": 31.375, "learning_rate": 5.1071077757380016e-05, "loss": 2.7086, "step": 24150 }, { "epoch": 0.735827700279081, "grad_norm": 32.0, "learning_rate": 5.1003972858125994e-05, "loss": 2.7426, "step": 24160 }, { "epoch": 0.7361322647245607, "grad_norm": 33.25, "learning_rate": 5.0936925419099067e-05, "loss": 2.6735, "step": 24170 }, { "epoch": 0.7364368291700405, "grad_norm": 31.5, "learning_rate": 5.0869935505601354e-05, "loss": 2.7549, "step": 24180 }, { "epoch": 0.7367413936155203, "grad_norm": 32.5, "learning_rate": 5.080300318287881e-05, "loss": 2.7492, "step": 24190 }, { "epoch": 0.7370459580609999, "grad_norm": 31.75, "learning_rate": 5.073612851612139e-05, "loss": 2.7035, "step": 24200 }, { "epoch": 0.7373505225064797, "grad_norm": 32.0, "learning_rate": 5.066931157046286e-05, "loss": 2.7406, "step": 24210 }, { "epoch": 0.7376550869519595, "grad_norm": 31.75, "learning_rate": 5.060255241098074e-05, "loss": 2.7248, "step": 24220 }, { "epoch": 0.7379596513974392, "grad_norm": 32.75, "learning_rate": 5.053585110269634e-05, "loss": 2.731, "step": 24230 }, { "epoch": 0.738264215842919, "grad_norm": 33.25, "learning_rate": 5.046920771057455e-05, "loss": 2.7126, "step": 24240 }, { "epoch": 0.7385687802883988, "grad_norm": 32.75, "learning_rate": 5.040262229952392e-05, "loss": 2.7342, "step": 24250 }, { "epoch": 0.7388733447338784, "grad_norm": 32.25, "learning_rate": 5.033609493439646e-05, "loss": 2.7544, "step": 24260 }, { "epoch": 0.7391779091793582, "grad_norm": 32.25, "learning_rate": 5.0269625679987654e-05, "loss": 2.7047, "step": 24270 }, { "epoch": 0.739482473624838, "grad_norm": 34.75, "learning_rate": 5.020321460103647e-05, "loss": 2.7035, "step": 24280 }, { "epoch": 0.7397870380703178, "grad_norm": 35.0, "learning_rate": 5.0136861762225153e-05, "loss": 2.7298, "step": 24290 }, { "epoch": 0.7400916025157975, "grad_norm": 31.875, "learning_rate": 5.0070567228179154e-05, "loss": 2.7496, "step": 24300 }, { "epoch": 0.7403961669612773, "grad_norm": 31.75, "learning_rate": 5.0004331063467294e-05, "loss": 2.7256, "step": 24310 }, { "epoch": 0.740700731406757, "grad_norm": 31.625, "learning_rate": 4.993815333260139e-05, "loss": 2.7365, "step": 24320 }, { "epoch": 0.7410052958522367, "grad_norm": 31.75, "learning_rate": 4.987203410003649e-05, "loss": 2.722, "step": 24330 }, { "epoch": 0.7413098602977165, "grad_norm": 33.25, "learning_rate": 4.980597343017057e-05, "loss": 2.7344, "step": 24340 }, { "epoch": 0.7416144247431963, "grad_norm": 31.75, "learning_rate": 4.973997138734454e-05, "loss": 2.7216, "step": 24350 }, { "epoch": 0.741918989188676, "grad_norm": 33.25, "learning_rate": 4.967402803584232e-05, "loss": 2.7089, "step": 24360 }, { "epoch": 0.7422235536341557, "grad_norm": 31.625, "learning_rate": 4.9608143439890564e-05, "loss": 2.7228, "step": 24370 }, { "epoch": 0.7425281180796355, "grad_norm": 32.25, "learning_rate": 4.954231766365879e-05, "loss": 2.7177, "step": 24380 }, { "epoch": 0.7428326825251153, "grad_norm": 31.125, "learning_rate": 4.947655077125915e-05, "loss": 2.7362, "step": 24390 }, { "epoch": 0.743137246970595, "grad_norm": 32.5, "learning_rate": 4.941084282674646e-05, "loss": 2.7171, "step": 24400 }, { "epoch": 0.7434418114160748, "grad_norm": 31.375, "learning_rate": 4.93451938941182e-05, "loss": 2.7252, "step": 24410 }, { "epoch": 0.7437463758615546, "grad_norm": 32.25, "learning_rate": 4.927960403731422e-05, "loss": 2.7095, "step": 24420 }, { "epoch": 0.7440509403070342, "grad_norm": 32.0, "learning_rate": 4.921407332021701e-05, "loss": 2.7125, "step": 24430 }, { "epoch": 0.744355504752514, "grad_norm": 32.5, "learning_rate": 4.9148601806651344e-05, "loss": 2.7178, "step": 24440 }, { "epoch": 0.7446600691979938, "grad_norm": 31.375, "learning_rate": 4.908318956038432e-05, "loss": 2.7002, "step": 24450 }, { "epoch": 0.7449646336434735, "grad_norm": 31.875, "learning_rate": 4.901783664512544e-05, "loss": 2.6994, "step": 24460 }, { "epoch": 0.7452691980889533, "grad_norm": 31.5, "learning_rate": 4.895254312452629e-05, "loss": 2.7136, "step": 24470 }, { "epoch": 0.7455737625344331, "grad_norm": 32.25, "learning_rate": 4.888730906218062e-05, "loss": 2.6963, "step": 24480 }, { "epoch": 0.7458783269799127, "grad_norm": 30.625, "learning_rate": 4.882213452162435e-05, "loss": 2.7643, "step": 24490 }, { "epoch": 0.7461828914253925, "grad_norm": 33.0, "learning_rate": 4.875701956633536e-05, "loss": 2.704, "step": 24500 }, { "epoch": 0.7464874558708723, "grad_norm": 32.75, "learning_rate": 4.8691964259733535e-05, "loss": 2.745, "step": 24510 }, { "epoch": 0.7467920203163521, "grad_norm": 31.375, "learning_rate": 4.862696866518064e-05, "loss": 2.7273, "step": 24520 }, { "epoch": 0.7470965847618318, "grad_norm": 31.75, "learning_rate": 4.856203284598026e-05, "loss": 2.7099, "step": 24530 }, { "epoch": 0.7474011492073116, "grad_norm": 31.75, "learning_rate": 4.849715686537782e-05, "loss": 2.7358, "step": 24540 }, { "epoch": 0.7477057136527913, "grad_norm": 31.0, "learning_rate": 4.843234078656039e-05, "loss": 2.7087, "step": 24550 }, { "epoch": 0.748010278098271, "grad_norm": 31.375, "learning_rate": 4.836758467265679e-05, "loss": 2.7087, "step": 24560 }, { "epoch": 0.7483148425437508, "grad_norm": 31.75, "learning_rate": 4.8302888586737346e-05, "loss": 2.7082, "step": 24570 }, { "epoch": 0.7486194069892306, "grad_norm": 31.75, "learning_rate": 4.8238252591813996e-05, "loss": 2.6933, "step": 24580 }, { "epoch": 0.7489239714347103, "grad_norm": 32.0, "learning_rate": 4.8173676750840095e-05, "loss": 2.7023, "step": 24590 }, { "epoch": 0.74922853588019, "grad_norm": 33.0, "learning_rate": 4.8109161126710406e-05, "loss": 2.7269, "step": 24600 }, { "epoch": 0.7495331003256698, "grad_norm": 31.75, "learning_rate": 4.8044705782261095e-05, "loss": 2.7295, "step": 24610 }, { "epoch": 0.7498376647711495, "grad_norm": 32.0, "learning_rate": 4.798031078026957e-05, "loss": 2.7186, "step": 24620 }, { "epoch": 0.7501422292166293, "grad_norm": 32.75, "learning_rate": 4.791597618345453e-05, "loss": 2.7521, "step": 24630 }, { "epoch": 0.7504467936621091, "grad_norm": 31.0, "learning_rate": 4.7851702054475745e-05, "loss": 2.7672, "step": 24640 }, { "epoch": 0.7507513581075889, "grad_norm": 30.75, "learning_rate": 4.7787488455934135e-05, "loss": 2.7024, "step": 24650 }, { "epoch": 0.7510559225530685, "grad_norm": 31.375, "learning_rate": 4.7723335450371735e-05, "loss": 2.7457, "step": 24660 }, { "epoch": 0.7513604869985483, "grad_norm": 31.0, "learning_rate": 4.765924310027146e-05, "loss": 2.695, "step": 24670 }, { "epoch": 0.7516650514440281, "grad_norm": 31.0, "learning_rate": 4.759521146805718e-05, "loss": 2.713, "step": 24680 }, { "epoch": 0.7519696158895078, "grad_norm": 31.125, "learning_rate": 4.753124061609368e-05, "loss": 2.7479, "step": 24690 }, { "epoch": 0.7522741803349876, "grad_norm": 32.75, "learning_rate": 4.746733060668645e-05, "loss": 2.7587, "step": 24700 }, { "epoch": 0.7525787447804674, "grad_norm": 31.25, "learning_rate": 4.7403481502081845e-05, "loss": 2.7401, "step": 24710 }, { "epoch": 0.752883309225947, "grad_norm": 32.5, "learning_rate": 4.733969336446679e-05, "loss": 2.7009, "step": 24720 }, { "epoch": 0.7531878736714268, "grad_norm": 32.5, "learning_rate": 4.727596625596886e-05, "loss": 2.7284, "step": 24730 }, { "epoch": 0.7534924381169066, "grad_norm": 31.75, "learning_rate": 4.721230023865625e-05, "loss": 2.7221, "step": 24740 }, { "epoch": 0.7537970025623864, "grad_norm": 33.5, "learning_rate": 4.714869537453755e-05, "loss": 2.7382, "step": 24750 }, { "epoch": 0.7541015670078661, "grad_norm": 32.75, "learning_rate": 4.708515172556191e-05, "loss": 2.7215, "step": 24760 }, { "epoch": 0.7544061314533459, "grad_norm": 31.875, "learning_rate": 4.702166935361876e-05, "loss": 2.7648, "step": 24770 }, { "epoch": 0.7547106958988256, "grad_norm": 31.5, "learning_rate": 4.695824832053786e-05, "loss": 2.6935, "step": 24780 }, { "epoch": 0.7550152603443053, "grad_norm": 31.875, "learning_rate": 4.6894888688089286e-05, "loss": 2.7036, "step": 24790 }, { "epoch": 0.7553198247897851, "grad_norm": 33.25, "learning_rate": 4.683159051798325e-05, "loss": 2.7158, "step": 24800 }, { "epoch": 0.7556243892352649, "grad_norm": 30.875, "learning_rate": 4.6768353871870165e-05, "loss": 2.7189, "step": 24810 }, { "epoch": 0.7559289536807446, "grad_norm": 34.0, "learning_rate": 4.6705178811340464e-05, "loss": 2.7237, "step": 24820 }, { "epoch": 0.7562335181262243, "grad_norm": 31.375, "learning_rate": 4.6642065397924575e-05, "loss": 2.7019, "step": 24830 }, { "epoch": 0.7565380825717041, "grad_norm": 31.875, "learning_rate": 4.6579013693093e-05, "loss": 2.7112, "step": 24840 }, { "epoch": 0.7568426470171838, "grad_norm": 32.0, "learning_rate": 4.651602375825603e-05, "loss": 2.7413, "step": 24850 }, { "epoch": 0.7571472114626636, "grad_norm": 32.5, "learning_rate": 4.645309565476379e-05, "loss": 2.7435, "step": 24860 }, { "epoch": 0.7574517759081434, "grad_norm": 30.875, "learning_rate": 4.63902294439063e-05, "loss": 2.7213, "step": 24870 }, { "epoch": 0.7577563403536232, "grad_norm": 32.25, "learning_rate": 4.632742518691316e-05, "loss": 2.7612, "step": 24880 }, { "epoch": 0.7580609047991028, "grad_norm": 32.5, "learning_rate": 4.626468294495374e-05, "loss": 2.7268, "step": 24890 }, { "epoch": 0.7583654692445826, "grad_norm": 32.25, "learning_rate": 4.6202002779136936e-05, "loss": 2.751, "step": 24900 }, { "epoch": 0.7586700336900624, "grad_norm": 31.125, "learning_rate": 4.6139384750511175e-05, "loss": 2.7371, "step": 24910 }, { "epoch": 0.7589745981355421, "grad_norm": 32.75, "learning_rate": 4.6076828920064456e-05, "loss": 2.7022, "step": 24920 }, { "epoch": 0.7592791625810219, "grad_norm": 32.5, "learning_rate": 4.60143353487241e-05, "loss": 2.7309, "step": 24930 }, { "epoch": 0.7595837270265017, "grad_norm": 31.25, "learning_rate": 4.595190409735685e-05, "loss": 2.7176, "step": 24940 }, { "epoch": 0.7598882914719813, "grad_norm": 33.25, "learning_rate": 4.588953522676869e-05, "loss": 2.7415, "step": 24950 }, { "epoch": 0.7601928559174611, "grad_norm": 31.625, "learning_rate": 4.582722879770495e-05, "loss": 2.7208, "step": 24960 }, { "epoch": 0.7604974203629409, "grad_norm": 33.0, "learning_rate": 4.5764984870850046e-05, "loss": 2.7398, "step": 24970 }, { "epoch": 0.7608019848084207, "grad_norm": 32.0, "learning_rate": 4.570280350682753e-05, "loss": 2.7028, "step": 24980 }, { "epoch": 0.7611065492539004, "grad_norm": 30.5, "learning_rate": 4.5640684766200106e-05, "loss": 2.7296, "step": 24990 }, { "epoch": 0.7614111136993802, "grad_norm": 31.75, "learning_rate": 4.557862870946936e-05, "loss": 2.7215, "step": 25000 }, { "epoch": 0.7617156781448599, "grad_norm": 31.25, "learning_rate": 4.551663539707594e-05, "loss": 2.7383, "step": 25010 }, { "epoch": 0.7620202425903396, "grad_norm": 32.75, "learning_rate": 4.545470488939933e-05, "loss": 2.7193, "step": 25020 }, { "epoch": 0.7623248070358194, "grad_norm": 31.875, "learning_rate": 4.539283724675778e-05, "loss": 2.731, "step": 25030 }, { "epoch": 0.7626293714812992, "grad_norm": 32.25, "learning_rate": 4.5331032529408465e-05, "loss": 2.714, "step": 25040 }, { "epoch": 0.7629339359267789, "grad_norm": 31.625, "learning_rate": 4.5269290797547144e-05, "loss": 2.7424, "step": 25050 }, { "epoch": 0.7632385003722586, "grad_norm": 32.75, "learning_rate": 4.520761211130825e-05, "loss": 2.726, "step": 25060 }, { "epoch": 0.7635430648177384, "grad_norm": 31.875, "learning_rate": 4.5145996530764885e-05, "loss": 2.72, "step": 25070 }, { "epoch": 0.7638476292632181, "grad_norm": 31.75, "learning_rate": 4.508444411592859e-05, "loss": 2.7397, "step": 25080 }, { "epoch": 0.7641521937086979, "grad_norm": 30.875, "learning_rate": 4.502295492674947e-05, "loss": 2.7276, "step": 25090 }, { "epoch": 0.7644567581541777, "grad_norm": 33.0, "learning_rate": 4.496152902311599e-05, "loss": 2.7303, "step": 25100 }, { "epoch": 0.7647613225996575, "grad_norm": 31.625, "learning_rate": 4.490016646485499e-05, "loss": 2.7167, "step": 25110 }, { "epoch": 0.7650658870451371, "grad_norm": 32.75, "learning_rate": 4.483886731173165e-05, "loss": 2.7299, "step": 25120 }, { "epoch": 0.7653704514906169, "grad_norm": 32.5, "learning_rate": 4.4777631623449325e-05, "loss": 2.7484, "step": 25130 }, { "epoch": 0.7656750159360967, "grad_norm": 33.0, "learning_rate": 4.471645945964965e-05, "loss": 2.7368, "step": 25140 }, { "epoch": 0.7659795803815764, "grad_norm": 32.0, "learning_rate": 4.4655350879912316e-05, "loss": 2.7082, "step": 25150 }, { "epoch": 0.7662841448270562, "grad_norm": 32.75, "learning_rate": 4.459430594375509e-05, "loss": 2.7003, "step": 25160 }, { "epoch": 0.766588709272536, "grad_norm": 32.0, "learning_rate": 4.4533324710633804e-05, "loss": 2.7069, "step": 25170 }, { "epoch": 0.7668932737180156, "grad_norm": 31.0, "learning_rate": 4.4472407239942205e-05, "loss": 2.7187, "step": 25180 }, { "epoch": 0.7671978381634954, "grad_norm": 32.25, "learning_rate": 4.44115535910119e-05, "loss": 2.7551, "step": 25190 }, { "epoch": 0.7675024026089752, "grad_norm": 33.0, "learning_rate": 4.435076382311244e-05, "loss": 2.7165, "step": 25200 }, { "epoch": 0.767806967054455, "grad_norm": 30.5, "learning_rate": 4.4290037995451055e-05, "loss": 2.7093, "step": 25210 }, { "epoch": 0.7681115314999347, "grad_norm": 31.625, "learning_rate": 4.422937616717278e-05, "loss": 2.7162, "step": 25220 }, { "epoch": 0.7684160959454145, "grad_norm": 31.25, "learning_rate": 4.4168778397360255e-05, "loss": 2.7367, "step": 25230 }, { "epoch": 0.7687206603908942, "grad_norm": 31.375, "learning_rate": 4.4108244745033734e-05, "loss": 2.7049, "step": 25240 }, { "epoch": 0.7690252248363739, "grad_norm": 32.0, "learning_rate": 4.4047775269151096e-05, "loss": 2.7589, "step": 25250 }, { "epoch": 0.7693297892818537, "grad_norm": 31.125, "learning_rate": 4.3987370028607585e-05, "loss": 2.6909, "step": 25260 }, { "epoch": 0.7696343537273335, "grad_norm": 30.625, "learning_rate": 4.392702908223604e-05, "loss": 2.7001, "step": 25270 }, { "epoch": 0.7699389181728132, "grad_norm": 30.5, "learning_rate": 4.3866752488806546e-05, "loss": 2.6919, "step": 25280 }, { "epoch": 0.7702434826182929, "grad_norm": 36.5, "learning_rate": 4.3806540307026544e-05, "loss": 2.7083, "step": 25290 }, { "epoch": 0.7705480470637727, "grad_norm": 32.5, "learning_rate": 4.374639259554081e-05, "loss": 2.7306, "step": 25300 }, { "epoch": 0.7708526115092524, "grad_norm": 31.25, "learning_rate": 4.368630941293121e-05, "loss": 2.7183, "step": 25310 }, { "epoch": 0.7711571759547322, "grad_norm": 31.375, "learning_rate": 4.362629081771693e-05, "loss": 2.7153, "step": 25320 }, { "epoch": 0.771461740400212, "grad_norm": 32.0, "learning_rate": 4.356633686835402e-05, "loss": 2.7152, "step": 25330 }, { "epoch": 0.7717663048456918, "grad_norm": 32.5, "learning_rate": 4.350644762323578e-05, "loss": 2.6999, "step": 25340 }, { "epoch": 0.7720708692911714, "grad_norm": 30.625, "learning_rate": 4.3446623140692364e-05, "loss": 2.6866, "step": 25350 }, { "epoch": 0.7723754337366512, "grad_norm": 31.5, "learning_rate": 4.338686347899086e-05, "loss": 2.7458, "step": 25360 }, { "epoch": 0.772679998182131, "grad_norm": 32.5, "learning_rate": 4.332716869633533e-05, "loss": 2.7508, "step": 25370 }, { "epoch": 0.7729845626276107, "grad_norm": 33.25, "learning_rate": 4.3267538850866506e-05, "loss": 2.6453, "step": 25380 }, { "epoch": 0.7732891270730905, "grad_norm": 33.0, "learning_rate": 4.3207974000661945e-05, "loss": 2.7197, "step": 25390 }, { "epoch": 0.7735936915185703, "grad_norm": 32.5, "learning_rate": 4.314847420373591e-05, "loss": 2.7436, "step": 25400 }, { "epoch": 0.7738982559640499, "grad_norm": 33.0, "learning_rate": 4.3089039518039245e-05, "loss": 2.7132, "step": 25410 }, { "epoch": 0.7742028204095297, "grad_norm": 32.5, "learning_rate": 4.302967000145947e-05, "loss": 2.7448, "step": 25420 }, { "epoch": 0.7745073848550095, "grad_norm": 37.0, "learning_rate": 4.297036571182053e-05, "loss": 2.7071, "step": 25430 }, { "epoch": 0.7748119493004892, "grad_norm": 33.0, "learning_rate": 4.2911126706882904e-05, "loss": 2.7229, "step": 25440 }, { "epoch": 0.775116513745969, "grad_norm": 33.25, "learning_rate": 4.28519530443435e-05, "loss": 2.7108, "step": 25450 }, { "epoch": 0.7754210781914488, "grad_norm": 32.25, "learning_rate": 4.279284478183551e-05, "loss": 2.729, "step": 25460 }, { "epoch": 0.7757256426369284, "grad_norm": 32.0, "learning_rate": 4.273380197692851e-05, "loss": 2.7298, "step": 25470 }, { "epoch": 0.7760302070824082, "grad_norm": 31.25, "learning_rate": 4.267482468712828e-05, "loss": 2.6968, "step": 25480 }, { "epoch": 0.776334771527888, "grad_norm": 31.5, "learning_rate": 4.261591296987677e-05, "loss": 2.6865, "step": 25490 }, { "epoch": 0.7766393359733678, "grad_norm": 31.625, "learning_rate": 4.2557066882552135e-05, "loss": 2.6962, "step": 25500 }, { "epoch": 0.7769439004188475, "grad_norm": 31.0, "learning_rate": 4.249828648246852e-05, "loss": 2.6979, "step": 25510 }, { "epoch": 0.7772484648643272, "grad_norm": 31.375, "learning_rate": 4.243957182687619e-05, "loss": 2.7446, "step": 25520 }, { "epoch": 0.777553029309807, "grad_norm": 31.375, "learning_rate": 4.238092297296131e-05, "loss": 2.7185, "step": 25530 }, { "epoch": 0.7778575937552867, "grad_norm": 32.25, "learning_rate": 4.232233997784592e-05, "loss": 2.7776, "step": 25540 }, { "epoch": 0.7781621582007665, "grad_norm": 32.75, "learning_rate": 4.2263822898588035e-05, "loss": 2.7247, "step": 25550 }, { "epoch": 0.7784667226462463, "grad_norm": 31.5, "learning_rate": 4.220537179218138e-05, "loss": 2.7257, "step": 25560 }, { "epoch": 0.778771287091726, "grad_norm": 31.5, "learning_rate": 4.214698671555541e-05, "loss": 2.7322, "step": 25570 }, { "epoch": 0.7790758515372057, "grad_norm": 31.875, "learning_rate": 4.208866772557537e-05, "loss": 2.7585, "step": 25580 }, { "epoch": 0.7793804159826855, "grad_norm": 31.125, "learning_rate": 4.203041487904201e-05, "loss": 2.7285, "step": 25590 }, { "epoch": 0.7796849804281653, "grad_norm": 31.625, "learning_rate": 4.197222823269179e-05, "loss": 2.6832, "step": 25600 }, { "epoch": 0.779989544873645, "grad_norm": 32.0, "learning_rate": 4.1914107843196584e-05, "loss": 2.7204, "step": 25610 }, { "epoch": 0.7802941093191248, "grad_norm": 32.5, "learning_rate": 4.185605376716376e-05, "loss": 2.7198, "step": 25620 }, { "epoch": 0.7805986737646046, "grad_norm": 33.0, "learning_rate": 4.179806606113618e-05, "loss": 2.7378, "step": 25630 }, { "epoch": 0.7809032382100842, "grad_norm": 31.5, "learning_rate": 4.174014478159192e-05, "loss": 2.7203, "step": 25640 }, { "epoch": 0.781207802655564, "grad_norm": 30.75, "learning_rate": 4.16822899849445e-05, "loss": 2.7329, "step": 25650 }, { "epoch": 0.7815123671010438, "grad_norm": 31.125, "learning_rate": 4.16245017275426e-05, "loss": 2.7248, "step": 25660 }, { "epoch": 0.7818169315465235, "grad_norm": 31.875, "learning_rate": 4.156678006567011e-05, "loss": 2.7242, "step": 25670 }, { "epoch": 0.7821214959920033, "grad_norm": 31.75, "learning_rate": 4.150912505554608e-05, "loss": 2.734, "step": 25680 }, { "epoch": 0.7824260604374831, "grad_norm": 32.75, "learning_rate": 4.1451536753324614e-05, "loss": 2.7584, "step": 25690 }, { "epoch": 0.7827306248829627, "grad_norm": 32.75, "learning_rate": 4.1394015215094894e-05, "loss": 2.7006, "step": 25700 }, { "epoch": 0.7830351893284425, "grad_norm": 33.25, "learning_rate": 4.133656049688103e-05, "loss": 2.7276, "step": 25710 }, { "epoch": 0.7833397537739223, "grad_norm": 31.875, "learning_rate": 4.127917265464205e-05, "loss": 2.6876, "step": 25720 }, { "epoch": 0.7836443182194021, "grad_norm": 32.25, "learning_rate": 4.1221851744271864e-05, "loss": 2.7328, "step": 25730 }, { "epoch": 0.7839488826648818, "grad_norm": 31.875, "learning_rate": 4.116459782159917e-05, "loss": 2.7139, "step": 25740 }, { "epoch": 0.7842534471103615, "grad_norm": 32.0, "learning_rate": 4.1107410942387484e-05, "loss": 2.7265, "step": 25750 }, { "epoch": 0.7845580115558413, "grad_norm": 32.25, "learning_rate": 4.105029116233496e-05, "loss": 2.7209, "step": 25760 }, { "epoch": 0.784862576001321, "grad_norm": 32.25, "learning_rate": 4.099323853707441e-05, "loss": 2.7313, "step": 25770 }, { "epoch": 0.7851671404468008, "grad_norm": 32.0, "learning_rate": 4.0936253122173276e-05, "loss": 2.7369, "step": 25780 }, { "epoch": 0.7854717048922806, "grad_norm": 32.75, "learning_rate": 4.0879334973133466e-05, "loss": 2.7627, "step": 25790 }, { "epoch": 0.7857762693377603, "grad_norm": 33.25, "learning_rate": 4.082248414539149e-05, "loss": 2.7009, "step": 25800 }, { "epoch": 0.78608083378324, "grad_norm": 32.0, "learning_rate": 4.076570069431819e-05, "loss": 2.703, "step": 25810 }, { "epoch": 0.7863853982287198, "grad_norm": 32.0, "learning_rate": 4.070898467521877e-05, "loss": 2.7321, "step": 25820 }, { "epoch": 0.7866899626741995, "grad_norm": 32.5, "learning_rate": 4.065233614333288e-05, "loss": 2.6775, "step": 25830 }, { "epoch": 0.7869945271196793, "grad_norm": 32.25, "learning_rate": 4.0595755153834294e-05, "loss": 2.7229, "step": 25840 }, { "epoch": 0.7872990915651591, "grad_norm": 32.0, "learning_rate": 4.0539241761831135e-05, "loss": 2.7054, "step": 25850 }, { "epoch": 0.7876036560106389, "grad_norm": 31.5, "learning_rate": 4.0482796022365573e-05, "loss": 2.7328, "step": 25860 }, { "epoch": 0.7879082204561185, "grad_norm": 31.875, "learning_rate": 4.0426417990413944e-05, "loss": 2.726, "step": 25870 }, { "epoch": 0.7882127849015983, "grad_norm": 30.875, "learning_rate": 4.037010772088667e-05, "loss": 2.6886, "step": 25880 }, { "epoch": 0.7885173493470781, "grad_norm": 31.25, "learning_rate": 4.031386526862808e-05, "loss": 2.7086, "step": 25890 }, { "epoch": 0.7888219137925578, "grad_norm": 32.75, "learning_rate": 4.0257690688416573e-05, "loss": 2.7082, "step": 25900 }, { "epoch": 0.7891264782380376, "grad_norm": 32.5, "learning_rate": 4.0201584034964335e-05, "loss": 2.767, "step": 25910 }, { "epoch": 0.7894310426835174, "grad_norm": 30.625, "learning_rate": 4.0145545362917406e-05, "loss": 2.6906, "step": 25920 }, { "epoch": 0.789735607128997, "grad_norm": 32.25, "learning_rate": 4.008957472685572e-05, "loss": 2.723, "step": 25930 }, { "epoch": 0.7900401715744768, "grad_norm": 33.0, "learning_rate": 4.003367218129284e-05, "loss": 2.7263, "step": 25940 }, { "epoch": 0.7903447360199566, "grad_norm": 30.75, "learning_rate": 3.997783778067601e-05, "loss": 2.707, "step": 25950 }, { "epoch": 0.7906493004654364, "grad_norm": 33.0, "learning_rate": 3.9922071579386165e-05, "loss": 2.7352, "step": 25960 }, { "epoch": 0.7909538649109161, "grad_norm": 32.25, "learning_rate": 3.986637363173777e-05, "loss": 2.7256, "step": 25970 }, { "epoch": 0.7912584293563958, "grad_norm": 31.75, "learning_rate": 3.981074399197887e-05, "loss": 2.7328, "step": 25980 }, { "epoch": 0.7915629938018756, "grad_norm": 30.25, "learning_rate": 3.97551827142909e-05, "loss": 2.7146, "step": 25990 }, { "epoch": 0.7918675582473553, "grad_norm": 32.5, "learning_rate": 3.969968985278875e-05, "loss": 2.7048, "step": 26000 }, { "epoch": 0.7921721226928351, "grad_norm": 31.5, "learning_rate": 3.96442654615207e-05, "loss": 2.6922, "step": 26010 }, { "epoch": 0.7924766871383149, "grad_norm": 32.5, "learning_rate": 3.9588909594468296e-05, "loss": 2.718, "step": 26020 }, { "epoch": 0.7927812515837946, "grad_norm": 33.25, "learning_rate": 3.953362230554639e-05, "loss": 2.7553, "step": 26030 }, { "epoch": 0.7930858160292743, "grad_norm": 32.5, "learning_rate": 3.947840364860301e-05, "loss": 2.7334, "step": 26040 }, { "epoch": 0.7933903804747541, "grad_norm": 32.75, "learning_rate": 3.942325367741933e-05, "loss": 2.721, "step": 26050 }, { "epoch": 0.7936949449202338, "grad_norm": 32.25, "learning_rate": 3.9368172445709676e-05, "loss": 2.6968, "step": 26060 }, { "epoch": 0.7939995093657136, "grad_norm": 31.375, "learning_rate": 3.931316000712135e-05, "loss": 2.7076, "step": 26070 }, { "epoch": 0.7943040738111934, "grad_norm": 30.75, "learning_rate": 3.9258216415234714e-05, "loss": 2.7347, "step": 26080 }, { "epoch": 0.7946086382566732, "grad_norm": 30.75, "learning_rate": 3.920334172356305e-05, "loss": 2.7282, "step": 26090 }, { "epoch": 0.7949132027021528, "grad_norm": 32.25, "learning_rate": 3.9148535985552537e-05, "loss": 2.7369, "step": 26100 }, { "epoch": 0.7952177671476326, "grad_norm": 32.75, "learning_rate": 3.9093799254582216e-05, "loss": 2.735, "step": 26110 }, { "epoch": 0.7955223315931124, "grad_norm": 31.75, "learning_rate": 3.9039131583963875e-05, "loss": 2.7123, "step": 26120 }, { "epoch": 0.7958268960385921, "grad_norm": 32.25, "learning_rate": 3.8984533026942074e-05, "loss": 2.7177, "step": 26130 }, { "epoch": 0.7961314604840719, "grad_norm": 31.75, "learning_rate": 3.8930003636694036e-05, "loss": 2.7096, "step": 26140 }, { "epoch": 0.7964360249295517, "grad_norm": 32.0, "learning_rate": 3.8875543466329607e-05, "loss": 2.733, "step": 26150 }, { "epoch": 0.7967405893750313, "grad_norm": 33.25, "learning_rate": 3.882115256889127e-05, "loss": 2.7196, "step": 26160 }, { "epoch": 0.7970451538205111, "grad_norm": 31.875, "learning_rate": 3.876683099735398e-05, "loss": 2.7223, "step": 26170 }, { "epoch": 0.7973497182659909, "grad_norm": 31.375, "learning_rate": 3.871257880462523e-05, "loss": 2.7098, "step": 26180 }, { "epoch": 0.7976542827114707, "grad_norm": 32.25, "learning_rate": 3.865839604354489e-05, "loss": 2.7298, "step": 26190 }, { "epoch": 0.7979588471569504, "grad_norm": 33.0, "learning_rate": 3.860428276688519e-05, "loss": 2.7222, "step": 26200 }, { "epoch": 0.7982634116024301, "grad_norm": 32.75, "learning_rate": 3.855023902735076e-05, "loss": 2.7219, "step": 26210 }, { "epoch": 0.7985679760479099, "grad_norm": 31.75, "learning_rate": 3.849626487757841e-05, "loss": 2.7091, "step": 26220 }, { "epoch": 0.7988725404933896, "grad_norm": 31.0, "learning_rate": 3.844236037013728e-05, "loss": 2.7207, "step": 26230 }, { "epoch": 0.7991771049388694, "grad_norm": 31.75, "learning_rate": 3.83885255575286e-05, "loss": 2.7209, "step": 26240 }, { "epoch": 0.7994816693843492, "grad_norm": 32.5, "learning_rate": 3.8334760492185694e-05, "loss": 2.767, "step": 26250 }, { "epoch": 0.7997862338298289, "grad_norm": 32.0, "learning_rate": 3.828106522647408e-05, "loss": 2.7499, "step": 26260 }, { "epoch": 0.8000907982753086, "grad_norm": 31.25, "learning_rate": 3.8227439812691154e-05, "loss": 2.7386, "step": 26270 }, { "epoch": 0.8003953627207884, "grad_norm": 32.5, "learning_rate": 3.817388430306634e-05, "loss": 2.7334, "step": 26280 }, { "epoch": 0.8006999271662681, "grad_norm": 31.25, "learning_rate": 3.812039874976103e-05, "loss": 2.7202, "step": 26290 }, { "epoch": 0.8010044916117479, "grad_norm": 33.75, "learning_rate": 3.806698320486837e-05, "loss": 2.7385, "step": 26300 }, { "epoch": 0.8013090560572277, "grad_norm": 31.875, "learning_rate": 3.8013637720413406e-05, "loss": 2.7031, "step": 26310 }, { "epoch": 0.8016136205027075, "grad_norm": 31.125, "learning_rate": 3.796036234835293e-05, "loss": 2.7491, "step": 26320 }, { "epoch": 0.8019181849481871, "grad_norm": 31.875, "learning_rate": 3.790715714057538e-05, "loss": 2.7058, "step": 26330 }, { "epoch": 0.8022227493936669, "grad_norm": 32.75, "learning_rate": 3.7854022148900986e-05, "loss": 2.7456, "step": 26340 }, { "epoch": 0.8025273138391467, "grad_norm": 31.25, "learning_rate": 3.7800957425081454e-05, "loss": 2.6949, "step": 26350 }, { "epoch": 0.8028318782846264, "grad_norm": 32.25, "learning_rate": 3.7747963020800165e-05, "loss": 2.7378, "step": 26360 }, { "epoch": 0.8031364427301062, "grad_norm": 31.875, "learning_rate": 3.769503898767193e-05, "loss": 2.7426, "step": 26370 }, { "epoch": 0.803441007175586, "grad_norm": 33.0, "learning_rate": 3.7642185377243026e-05, "loss": 2.7284, "step": 26380 }, { "epoch": 0.8037455716210656, "grad_norm": 31.5, "learning_rate": 3.758940224099122e-05, "loss": 2.7118, "step": 26390 }, { "epoch": 0.8040501360665454, "grad_norm": 35.75, "learning_rate": 3.753668963032553e-05, "loss": 2.7504, "step": 26400 }, { "epoch": 0.8043547005120252, "grad_norm": 31.125, "learning_rate": 3.748404759658639e-05, "loss": 2.7081, "step": 26410 }, { "epoch": 0.804659264957505, "grad_norm": 32.0, "learning_rate": 3.743147619104541e-05, "loss": 2.718, "step": 26420 }, { "epoch": 0.8049638294029847, "grad_norm": 31.75, "learning_rate": 3.737897546490543e-05, "loss": 2.7336, "step": 26430 }, { "epoch": 0.8052683938484644, "grad_norm": 32.5, "learning_rate": 3.7326545469300514e-05, "loss": 2.7111, "step": 26440 }, { "epoch": 0.8055729582939442, "grad_norm": 30.875, "learning_rate": 3.727418625529575e-05, "loss": 2.7284, "step": 26450 }, { "epoch": 0.8058775227394239, "grad_norm": 31.5, "learning_rate": 3.722189787388729e-05, "loss": 2.7433, "step": 26460 }, { "epoch": 0.8061820871849037, "grad_norm": 38.75, "learning_rate": 3.716968037600241e-05, "loss": 2.7075, "step": 26470 }, { "epoch": 0.8064866516303835, "grad_norm": 32.5, "learning_rate": 3.7117533812499206e-05, "loss": 2.7077, "step": 26480 }, { "epoch": 0.8067912160758632, "grad_norm": 32.25, "learning_rate": 3.706545823416678e-05, "loss": 2.708, "step": 26490 }, { "epoch": 0.8070957805213429, "grad_norm": 31.5, "learning_rate": 3.701345369172505e-05, "loss": 2.7277, "step": 26500 }, { "epoch": 0.8074003449668227, "grad_norm": 32.25, "learning_rate": 3.6961520235824784e-05, "loss": 2.7117, "step": 26510 }, { "epoch": 0.8077049094123024, "grad_norm": 32.0, "learning_rate": 3.690965791704747e-05, "loss": 2.7293, "step": 26520 }, { "epoch": 0.8080094738577822, "grad_norm": 31.75, "learning_rate": 3.6857866785905316e-05, "loss": 2.7028, "step": 26530 }, { "epoch": 0.808314038303262, "grad_norm": 32.75, "learning_rate": 3.680614689284127e-05, "loss": 2.6893, "step": 26540 }, { "epoch": 0.8086186027487418, "grad_norm": 31.125, "learning_rate": 3.675449828822877e-05, "loss": 2.7456, "step": 26550 }, { "epoch": 0.8089231671942214, "grad_norm": 31.75, "learning_rate": 3.670292102237195e-05, "loss": 2.6901, "step": 26560 }, { "epoch": 0.8092277316397012, "grad_norm": 32.75, "learning_rate": 3.6651415145505373e-05, "loss": 2.7272, "step": 26570 }, { "epoch": 0.809532296085181, "grad_norm": 30.75, "learning_rate": 3.65999807077941e-05, "loss": 2.6793, "step": 26580 }, { "epoch": 0.8098368605306607, "grad_norm": 32.0, "learning_rate": 3.654861775933363e-05, "loss": 2.7354, "step": 26590 }, { "epoch": 0.8101414249761405, "grad_norm": 31.0, "learning_rate": 3.6497326350149785e-05, "loss": 2.6886, "step": 26600 }, { "epoch": 0.8104459894216203, "grad_norm": 32.75, "learning_rate": 3.64461065301988e-05, "loss": 2.738, "step": 26610 }, { "epoch": 0.8107505538670999, "grad_norm": 31.75, "learning_rate": 3.6394958349367106e-05, "loss": 2.7232, "step": 26620 }, { "epoch": 0.8110551183125797, "grad_norm": 30.875, "learning_rate": 3.634388185747132e-05, "loss": 2.7091, "step": 26630 }, { "epoch": 0.8113596827580595, "grad_norm": 31.875, "learning_rate": 3.6292877104258395e-05, "loss": 2.7505, "step": 26640 }, { "epoch": 0.8116642472035392, "grad_norm": 33.0, "learning_rate": 3.6241944139405255e-05, "loss": 2.7321, "step": 26650 }, { "epoch": 0.811968811649019, "grad_norm": 32.5, "learning_rate": 3.6191083012518944e-05, "loss": 2.739, "step": 26660 }, { "epoch": 0.8122733760944987, "grad_norm": 32.5, "learning_rate": 3.6140293773136625e-05, "loss": 2.7218, "step": 26670 }, { "epoch": 0.8125779405399784, "grad_norm": 31.5, "learning_rate": 3.6089576470725315e-05, "loss": 2.7275, "step": 26680 }, { "epoch": 0.8128825049854582, "grad_norm": 33.75, "learning_rate": 3.60389311546821e-05, "loss": 2.6956, "step": 26690 }, { "epoch": 0.813187069430938, "grad_norm": 33.25, "learning_rate": 3.598835787433382e-05, "loss": 2.7075, "step": 26700 }, { "epoch": 0.8134916338764178, "grad_norm": 31.875, "learning_rate": 3.593785667893723e-05, "loss": 2.7041, "step": 26710 }, { "epoch": 0.8137961983218975, "grad_norm": 31.0, "learning_rate": 3.5887427617678914e-05, "loss": 2.7172, "step": 26720 }, { "epoch": 0.8141007627673772, "grad_norm": 32.5, "learning_rate": 3.583707073967507e-05, "loss": 2.7162, "step": 26730 }, { "epoch": 0.814405327212857, "grad_norm": 31.0, "learning_rate": 3.578678609397175e-05, "loss": 2.6887, "step": 26740 }, { "epoch": 0.8147098916583367, "grad_norm": 32.5, "learning_rate": 3.573657372954455e-05, "loss": 2.7455, "step": 26750 }, { "epoch": 0.8150144561038165, "grad_norm": 31.25, "learning_rate": 3.568643369529865e-05, "loss": 2.7109, "step": 26760 }, { "epoch": 0.8153190205492963, "grad_norm": 32.0, "learning_rate": 3.563636604006889e-05, "loss": 2.698, "step": 26770 }, { "epoch": 0.815623584994776, "grad_norm": 32.5, "learning_rate": 3.55863708126195e-05, "loss": 2.697, "step": 26780 }, { "epoch": 0.8159281494402557, "grad_norm": 31.625, "learning_rate": 3.5536448061644265e-05, "loss": 2.7099, "step": 26790 }, { "epoch": 0.8162327138857355, "grad_norm": 31.25, "learning_rate": 3.548659783576632e-05, "loss": 2.7245, "step": 26800 }, { "epoch": 0.8165372783312153, "grad_norm": 31.5, "learning_rate": 3.5436820183538135e-05, "loss": 2.7188, "step": 26810 }, { "epoch": 0.816841842776695, "grad_norm": 33.5, "learning_rate": 3.53871151534416e-05, "loss": 2.7409, "step": 26820 }, { "epoch": 0.8171464072221748, "grad_norm": 32.25, "learning_rate": 3.5337482793887765e-05, "loss": 2.747, "step": 26830 }, { "epoch": 0.8174509716676546, "grad_norm": 33.0, "learning_rate": 3.528792315321695e-05, "loss": 2.7212, "step": 26840 }, { "epoch": 0.8177555361131342, "grad_norm": 31.5, "learning_rate": 3.523843627969869e-05, "loss": 2.7189, "step": 26850 }, { "epoch": 0.818060100558614, "grad_norm": 32.75, "learning_rate": 3.5189022221531545e-05, "loss": 2.7386, "step": 26860 }, { "epoch": 0.8183646650040938, "grad_norm": 32.5, "learning_rate": 3.513968102684325e-05, "loss": 2.7046, "step": 26870 }, { "epoch": 0.8186692294495735, "grad_norm": 32.75, "learning_rate": 3.5090412743690535e-05, "loss": 2.7376, "step": 26880 }, { "epoch": 0.8189737938950533, "grad_norm": 32.75, "learning_rate": 3.5041217420059094e-05, "loss": 2.7175, "step": 26890 }, { "epoch": 0.8192783583405331, "grad_norm": 32.5, "learning_rate": 3.499209510386362e-05, "loss": 2.7163, "step": 26900 }, { "epoch": 0.8195829227860127, "grad_norm": 33.0, "learning_rate": 3.4943045842947643e-05, "loss": 2.7092, "step": 26910 }, { "epoch": 0.8198874872314925, "grad_norm": 32.25, "learning_rate": 3.489406968508357e-05, "loss": 2.7315, "step": 26920 }, { "epoch": 0.8201920516769723, "grad_norm": 31.5, "learning_rate": 3.484516667797257e-05, "loss": 2.7151, "step": 26930 }, { "epoch": 0.8204966161224521, "grad_norm": 32.0, "learning_rate": 3.479633686924463e-05, "loss": 2.6888, "step": 26940 }, { "epoch": 0.8208011805679318, "grad_norm": 31.625, "learning_rate": 3.47475803064584e-05, "loss": 2.7334, "step": 26950 }, { "epoch": 0.8211057450134115, "grad_norm": 33.0, "learning_rate": 3.469889703710118e-05, "loss": 2.7197, "step": 26960 }, { "epoch": 0.8214103094588913, "grad_norm": 31.25, "learning_rate": 3.465028710858893e-05, "loss": 2.7312, "step": 26970 }, { "epoch": 0.821714873904371, "grad_norm": 31.625, "learning_rate": 3.4601750568266145e-05, "loss": 2.721, "step": 26980 }, { "epoch": 0.8220194383498508, "grad_norm": 33.0, "learning_rate": 3.4553287463405814e-05, "loss": 2.7349, "step": 26990 }, { "epoch": 0.8223240027953306, "grad_norm": 30.75, "learning_rate": 3.45048978412095e-05, "loss": 2.701, "step": 27000 }, { "epoch": 0.8226285672408103, "grad_norm": 31.0, "learning_rate": 3.445658174880708e-05, "loss": 2.7346, "step": 27010 }, { "epoch": 0.82293313168629, "grad_norm": 32.25, "learning_rate": 3.4408339233256916e-05, "loss": 2.7179, "step": 27020 }, { "epoch": 0.8232376961317698, "grad_norm": 32.5, "learning_rate": 3.436017034154562e-05, "loss": 2.7244, "step": 27030 }, { "epoch": 0.8235422605772496, "grad_norm": 32.5, "learning_rate": 3.431207512058815e-05, "loss": 2.7197, "step": 27040 }, { "epoch": 0.8238468250227293, "grad_norm": 33.75, "learning_rate": 3.426405361722772e-05, "loss": 2.7072, "step": 27050 }, { "epoch": 0.8241513894682091, "grad_norm": 31.625, "learning_rate": 3.4216105878235696e-05, "loss": 2.7253, "step": 27060 }, { "epoch": 0.8244559539136889, "grad_norm": 31.625, "learning_rate": 3.416823195031166e-05, "loss": 2.6745, "step": 27070 }, { "epoch": 0.8247605183591685, "grad_norm": 31.0, "learning_rate": 3.4120431880083256e-05, "loss": 2.7602, "step": 27080 }, { "epoch": 0.8250650828046483, "grad_norm": 33.25, "learning_rate": 3.407270571410619e-05, "loss": 2.6955, "step": 27090 }, { "epoch": 0.8253696472501281, "grad_norm": 32.75, "learning_rate": 3.402505349886425e-05, "loss": 2.6811, "step": 27100 }, { "epoch": 0.8256742116956078, "grad_norm": 30.625, "learning_rate": 3.397747528076911e-05, "loss": 2.7248, "step": 27110 }, { "epoch": 0.8259787761410876, "grad_norm": 34.5, "learning_rate": 3.392997110616047e-05, "loss": 2.7213, "step": 27120 }, { "epoch": 0.8262833405865674, "grad_norm": 31.875, "learning_rate": 3.388254102130584e-05, "loss": 2.6961, "step": 27130 }, { "epoch": 0.826587905032047, "grad_norm": 33.0, "learning_rate": 3.3835185072400565e-05, "loss": 2.7262, "step": 27140 }, { "epoch": 0.8268924694775268, "grad_norm": 32.25, "learning_rate": 3.378790330556786e-05, "loss": 2.7479, "step": 27150 }, { "epoch": 0.8271970339230066, "grad_norm": 31.875, "learning_rate": 3.3740695766858616e-05, "loss": 2.7106, "step": 27160 }, { "epoch": 0.8275015983684864, "grad_norm": 31.75, "learning_rate": 3.3693562502251433e-05, "loss": 2.7164, "step": 27170 }, { "epoch": 0.8278061628139661, "grad_norm": 31.75, "learning_rate": 3.3646503557652644e-05, "loss": 2.7475, "step": 27180 }, { "epoch": 0.8281107272594458, "grad_norm": 31.375, "learning_rate": 3.3599518978896074e-05, "loss": 2.7001, "step": 27190 }, { "epoch": 0.8284152917049256, "grad_norm": 32.75, "learning_rate": 3.355260881174325e-05, "loss": 2.7151, "step": 27200 }, { "epoch": 0.8287198561504053, "grad_norm": 31.75, "learning_rate": 3.350577310188313e-05, "loss": 2.7421, "step": 27210 }, { "epoch": 0.8290244205958851, "grad_norm": 31.25, "learning_rate": 3.345901189493217e-05, "loss": 2.7046, "step": 27220 }, { "epoch": 0.8293289850413649, "grad_norm": 31.375, "learning_rate": 3.341232523643432e-05, "loss": 2.7127, "step": 27230 }, { "epoch": 0.8296335494868446, "grad_norm": 31.375, "learning_rate": 3.3365713171860824e-05, "loss": 2.7281, "step": 27240 }, { "epoch": 0.8299381139323243, "grad_norm": 33.5, "learning_rate": 3.331917574661039e-05, "loss": 2.7139, "step": 27250 }, { "epoch": 0.8302426783778041, "grad_norm": 32.5, "learning_rate": 3.3272713006008946e-05, "loss": 2.7106, "step": 27260 }, { "epoch": 0.8305472428232838, "grad_norm": 34.5, "learning_rate": 3.3226324995309666e-05, "loss": 2.7043, "step": 27270 }, { "epoch": 0.8308518072687636, "grad_norm": 32.75, "learning_rate": 3.318001175969303e-05, "loss": 2.72, "step": 27280 }, { "epoch": 0.8311563717142434, "grad_norm": 31.0, "learning_rate": 3.3133773344266584e-05, "loss": 2.7256, "step": 27290 }, { "epoch": 0.8314609361597232, "grad_norm": 32.75, "learning_rate": 3.308760979406512e-05, "loss": 2.7303, "step": 27300 }, { "epoch": 0.8317655006052028, "grad_norm": 32.25, "learning_rate": 3.304152115405037e-05, "loss": 2.7546, "step": 27310 }, { "epoch": 0.8320700650506826, "grad_norm": 31.625, "learning_rate": 3.299550746911124e-05, "loss": 2.7114, "step": 27320 }, { "epoch": 0.8323746294961624, "grad_norm": 32.25, "learning_rate": 3.294956878406355e-05, "loss": 2.7251, "step": 27330 }, { "epoch": 0.8326791939416421, "grad_norm": 31.375, "learning_rate": 3.290370514365008e-05, "loss": 2.7339, "step": 27340 }, { "epoch": 0.8329837583871219, "grad_norm": 32.0, "learning_rate": 3.2857916592540577e-05, "loss": 2.7091, "step": 27350 }, { "epoch": 0.8332883228326017, "grad_norm": 31.625, "learning_rate": 3.281220317533159e-05, "loss": 2.7238, "step": 27360 }, { "epoch": 0.8335928872780813, "grad_norm": 31.25, "learning_rate": 3.276656493654651e-05, "loss": 2.737, "step": 27370 }, { "epoch": 0.8338974517235611, "grad_norm": 31.875, "learning_rate": 3.2721001920635506e-05, "loss": 2.6978, "step": 27380 }, { "epoch": 0.8342020161690409, "grad_norm": 32.0, "learning_rate": 3.26755141719755e-05, "loss": 2.7321, "step": 27390 }, { "epoch": 0.8345065806145207, "grad_norm": 32.25, "learning_rate": 3.263010173487009e-05, "loss": 2.748, "step": 27400 }, { "epoch": 0.8348111450600004, "grad_norm": 31.875, "learning_rate": 3.258476465354953e-05, "loss": 2.7334, "step": 27410 }, { "epoch": 0.8351157095054801, "grad_norm": 31.625, "learning_rate": 3.2539502972170635e-05, "loss": 2.6823, "step": 27420 }, { "epoch": 0.8354202739509599, "grad_norm": 32.25, "learning_rate": 3.249431673481688e-05, "loss": 2.6994, "step": 27430 }, { "epoch": 0.8357248383964396, "grad_norm": 31.875, "learning_rate": 3.244920598549814e-05, "loss": 2.7065, "step": 27440 }, { "epoch": 0.8360294028419194, "grad_norm": 31.5, "learning_rate": 3.24041707681509e-05, "loss": 2.6922, "step": 27450 }, { "epoch": 0.8363339672873992, "grad_norm": 32.25, "learning_rate": 3.235921112663797e-05, "loss": 2.7257, "step": 27460 }, { "epoch": 0.8366385317328789, "grad_norm": 32.5, "learning_rate": 3.2314327104748564e-05, "loss": 2.7631, "step": 27470 }, { "epoch": 0.8369430961783586, "grad_norm": 31.25, "learning_rate": 3.226951874619833e-05, "loss": 2.7078, "step": 27480 }, { "epoch": 0.8372476606238384, "grad_norm": 32.0, "learning_rate": 3.222478609462911e-05, "loss": 2.6934, "step": 27490 }, { "epoch": 0.8375522250693181, "grad_norm": 31.75, "learning_rate": 3.218012919360911e-05, "loss": 2.7074, "step": 27500 }, { "epoch": 0.8378567895147979, "grad_norm": 31.25, "learning_rate": 3.213554808663268e-05, "loss": 2.7207, "step": 27510 }, { "epoch": 0.8381613539602777, "grad_norm": 31.625, "learning_rate": 3.2091042817120375e-05, "loss": 2.7053, "step": 27520 }, { "epoch": 0.8384659184057575, "grad_norm": 32.75, "learning_rate": 3.2046613428418925e-05, "loss": 2.7464, "step": 27530 }, { "epoch": 0.8387704828512371, "grad_norm": 33.75, "learning_rate": 3.20022599638011e-05, "loss": 2.751, "step": 27540 }, { "epoch": 0.8390750472967169, "grad_norm": 31.375, "learning_rate": 3.195798246646572e-05, "loss": 2.767, "step": 27550 }, { "epoch": 0.8393796117421967, "grad_norm": 33.25, "learning_rate": 3.191378097953769e-05, "loss": 2.7444, "step": 27560 }, { "epoch": 0.8396841761876764, "grad_norm": 31.75, "learning_rate": 3.186965554606779e-05, "loss": 2.7074, "step": 27570 }, { "epoch": 0.8399887406331562, "grad_norm": 32.0, "learning_rate": 3.1825606209032795e-05, "loss": 2.7191, "step": 27580 }, { "epoch": 0.840293305078636, "grad_norm": 31.25, "learning_rate": 3.178163301133533e-05, "loss": 2.709, "step": 27590 }, { "epoch": 0.8405978695241156, "grad_norm": 30.875, "learning_rate": 3.173773599580386e-05, "loss": 2.7389, "step": 27600 }, { "epoch": 0.8409024339695954, "grad_norm": 32.75, "learning_rate": 3.169391520519268e-05, "loss": 2.7138, "step": 27610 }, { "epoch": 0.8412069984150752, "grad_norm": 31.75, "learning_rate": 3.16501706821818e-05, "loss": 2.7183, "step": 27620 }, { "epoch": 0.841511562860555, "grad_norm": 31.5, "learning_rate": 3.1606502469377e-05, "loss": 2.694, "step": 27630 }, { "epoch": 0.8418161273060347, "grad_norm": 32.25, "learning_rate": 3.156291060930972e-05, "loss": 2.7089, "step": 27640 }, { "epoch": 0.8421206917515144, "grad_norm": 31.625, "learning_rate": 3.151939514443698e-05, "loss": 2.7353, "step": 27650 }, { "epoch": 0.8424252561969942, "grad_norm": 31.875, "learning_rate": 3.147595611714148e-05, "loss": 2.7048, "step": 27660 }, { "epoch": 0.8427298206424739, "grad_norm": 31.875, "learning_rate": 3.1432593569731405e-05, "loss": 2.7143, "step": 27670 }, { "epoch": 0.8430343850879537, "grad_norm": 32.75, "learning_rate": 3.13893075444405e-05, "loss": 2.7415, "step": 27680 }, { "epoch": 0.8433389495334335, "grad_norm": 31.5, "learning_rate": 3.134609808342796e-05, "loss": 2.6869, "step": 27690 }, { "epoch": 0.8436435139789132, "grad_norm": 32.5, "learning_rate": 3.130296522877838e-05, "loss": 2.7211, "step": 27700 }, { "epoch": 0.8439480784243929, "grad_norm": 32.0, "learning_rate": 3.125990902250178e-05, "loss": 2.7359, "step": 27710 }, { "epoch": 0.8442526428698727, "grad_norm": 30.5, "learning_rate": 3.1216929506533504e-05, "loss": 2.6997, "step": 27720 }, { "epoch": 0.8445572073153524, "grad_norm": 32.0, "learning_rate": 3.117402672273425e-05, "loss": 2.7107, "step": 27730 }, { "epoch": 0.8448617717608322, "grad_norm": 32.25, "learning_rate": 3.1131200712889916e-05, "loss": 2.7353, "step": 27740 }, { "epoch": 0.845166336206312, "grad_norm": 32.75, "learning_rate": 3.1088451518711645e-05, "loss": 2.6953, "step": 27750 }, { "epoch": 0.8454709006517918, "grad_norm": 32.0, "learning_rate": 3.10457791818358e-05, "loss": 2.6954, "step": 27760 }, { "epoch": 0.8457754650972714, "grad_norm": 32.0, "learning_rate": 3.100318374382384e-05, "loss": 2.733, "step": 27770 }, { "epoch": 0.8460800295427512, "grad_norm": 31.5, "learning_rate": 3.096066524616237e-05, "loss": 2.7219, "step": 27780 }, { "epoch": 0.846384593988231, "grad_norm": 33.0, "learning_rate": 3.0918223730263026e-05, "loss": 2.7432, "step": 27790 }, { "epoch": 0.8466891584337107, "grad_norm": 30.875, "learning_rate": 3.087585923746244e-05, "loss": 2.7066, "step": 27800 }, { "epoch": 0.8469937228791905, "grad_norm": 31.5, "learning_rate": 3.083357180902233e-05, "loss": 2.7105, "step": 27810 }, { "epoch": 0.8472982873246703, "grad_norm": 33.25, "learning_rate": 3.079136148612921e-05, "loss": 2.7155, "step": 27820 }, { "epoch": 0.8476028517701499, "grad_norm": 33.0, "learning_rate": 3.074922830989465e-05, "loss": 2.7471, "step": 27830 }, { "epoch": 0.8479074162156297, "grad_norm": 32.25, "learning_rate": 3.070717232135495e-05, "loss": 2.7198, "step": 27840 }, { "epoch": 0.8482119806611095, "grad_norm": 31.5, "learning_rate": 3.0665193561471286e-05, "loss": 2.7272, "step": 27850 }, { "epoch": 0.8485165451065892, "grad_norm": 31.75, "learning_rate": 3.062329207112965e-05, "loss": 2.7266, "step": 27860 }, { "epoch": 0.848821109552069, "grad_norm": 31.875, "learning_rate": 3.058146789114072e-05, "loss": 2.7419, "step": 27870 }, { "epoch": 0.8491256739975487, "grad_norm": 31.75, "learning_rate": 3.0539721062239906e-05, "loss": 2.6864, "step": 27880 }, { "epoch": 0.8494302384430285, "grad_norm": 32.5, "learning_rate": 3.0498051625087263e-05, "loss": 2.7444, "step": 27890 }, { "epoch": 0.8497348028885082, "grad_norm": 30.375, "learning_rate": 3.0456459620267486e-05, "loss": 2.6934, "step": 27900 }, { "epoch": 0.850039367333988, "grad_norm": 32.25, "learning_rate": 3.0414945088289838e-05, "loss": 2.7083, "step": 27910 }, { "epoch": 0.8503439317794678, "grad_norm": 32.5, "learning_rate": 3.0373508069588153e-05, "loss": 2.7088, "step": 27920 }, { "epoch": 0.8506484962249475, "grad_norm": 31.375, "learning_rate": 3.033214860452072e-05, "loss": 2.697, "step": 27930 }, { "epoch": 0.8509530606704272, "grad_norm": 32.25, "learning_rate": 3.0290866733370354e-05, "loss": 2.7212, "step": 27940 }, { "epoch": 0.851257625115907, "grad_norm": 30.625, "learning_rate": 3.0249662496344233e-05, "loss": 2.7143, "step": 27950 }, { "epoch": 0.8515621895613867, "grad_norm": 31.75, "learning_rate": 3.0208535933573988e-05, "loss": 2.7298, "step": 27960 }, { "epoch": 0.8518667540068665, "grad_norm": 31.125, "learning_rate": 3.0167487085115566e-05, "loss": 2.7195, "step": 27970 }, { "epoch": 0.8521713184523463, "grad_norm": 32.75, "learning_rate": 3.0126515990949172e-05, "loss": 2.706, "step": 27980 }, { "epoch": 0.852475882897826, "grad_norm": 32.5, "learning_rate": 3.0085622690979387e-05, "loss": 2.7182, "step": 27990 }, { "epoch": 0.8527804473433057, "grad_norm": 32.5, "learning_rate": 3.0044807225034922e-05, "loss": 2.7307, "step": 28000 }, { "epoch": 0.8530850117887855, "grad_norm": 31.375, "learning_rate": 3.0004069632868766e-05, "loss": 2.7053, "step": 28010 }, { "epoch": 0.8533895762342653, "grad_norm": 32.25, "learning_rate": 2.9963409954157983e-05, "loss": 2.7369, "step": 28020 }, { "epoch": 0.853694140679745, "grad_norm": 33.0, "learning_rate": 2.9922828228503808e-05, "loss": 2.7315, "step": 28030 }, { "epoch": 0.8539987051252248, "grad_norm": 31.125, "learning_rate": 2.988232449543153e-05, "loss": 2.7141, "step": 28040 }, { "epoch": 0.8543032695707046, "grad_norm": 31.25, "learning_rate": 2.9841898794390477e-05, "loss": 2.6759, "step": 28050 }, { "epoch": 0.8546078340161842, "grad_norm": 31.75, "learning_rate": 2.980155116475396e-05, "loss": 2.7075, "step": 28060 }, { "epoch": 0.854912398461664, "grad_norm": 34.5, "learning_rate": 2.9761281645819295e-05, "loss": 2.7137, "step": 28070 }, { "epoch": 0.8552169629071438, "grad_norm": 31.125, "learning_rate": 2.972109027680768e-05, "loss": 2.748, "step": 28080 }, { "epoch": 0.8555215273526235, "grad_norm": 31.75, "learning_rate": 2.9680977096864248e-05, "loss": 2.7099, "step": 28090 }, { "epoch": 0.8558260917981033, "grad_norm": 32.75, "learning_rate": 2.9640942145057866e-05, "loss": 2.7069, "step": 28100 }, { "epoch": 0.856130656243583, "grad_norm": 33.5, "learning_rate": 2.9600985460381352e-05, "loss": 2.6965, "step": 28110 }, { "epoch": 0.8564352206890627, "grad_norm": 31.75, "learning_rate": 2.9561107081751215e-05, "loss": 2.6974, "step": 28120 }, { "epoch": 0.8567397851345425, "grad_norm": 29.875, "learning_rate": 2.952130704800767e-05, "loss": 2.6938, "step": 28130 }, { "epoch": 0.8570443495800223, "grad_norm": 32.5, "learning_rate": 2.9481585397914718e-05, "loss": 2.7379, "step": 28140 }, { "epoch": 0.8573489140255021, "grad_norm": 31.125, "learning_rate": 2.944194217015992e-05, "loss": 2.7033, "step": 28150 }, { "epoch": 0.8576534784709818, "grad_norm": 32.25, "learning_rate": 2.940237740335454e-05, "loss": 2.7164, "step": 28160 }, { "epoch": 0.8579580429164615, "grad_norm": 30.875, "learning_rate": 2.9362891136033378e-05, "loss": 2.7031, "step": 28170 }, { "epoch": 0.8582626073619413, "grad_norm": 31.75, "learning_rate": 2.9323483406654746e-05, "loss": 2.7328, "step": 28180 }, { "epoch": 0.858567171807421, "grad_norm": 33.25, "learning_rate": 2.9284154253600554e-05, "loss": 2.7384, "step": 28190 }, { "epoch": 0.8588717362529008, "grad_norm": 31.75, "learning_rate": 2.9244903715176087e-05, "loss": 2.6839, "step": 28200 }, { "epoch": 0.8591763006983806, "grad_norm": 32.25, "learning_rate": 2.920573182961013e-05, "loss": 2.7255, "step": 28210 }, { "epoch": 0.8594808651438604, "grad_norm": 30.5, "learning_rate": 2.9166638635054828e-05, "loss": 2.7046, "step": 28220 }, { "epoch": 0.85978542958934, "grad_norm": 30.875, "learning_rate": 2.912762416958567e-05, "loss": 2.7177, "step": 28230 }, { "epoch": 0.8600899940348198, "grad_norm": 30.5, "learning_rate": 2.908868847120152e-05, "loss": 2.7213, "step": 28240 }, { "epoch": 0.8603945584802996, "grad_norm": 31.125, "learning_rate": 2.9049831577824483e-05, "loss": 2.7053, "step": 28250 }, { "epoch": 0.8606991229257793, "grad_norm": 31.75, "learning_rate": 2.9011053527299896e-05, "loss": 2.711, "step": 28260 }, { "epoch": 0.8610036873712591, "grad_norm": 31.0, "learning_rate": 2.897235435739635e-05, "loss": 2.7153, "step": 28270 }, { "epoch": 0.8613082518167389, "grad_norm": 32.0, "learning_rate": 2.893373410580559e-05, "loss": 2.7507, "step": 28280 }, { "epoch": 0.8616128162622185, "grad_norm": 32.5, "learning_rate": 2.8895192810142496e-05, "loss": 2.6879, "step": 28290 }, { "epoch": 0.8619173807076983, "grad_norm": 32.5, "learning_rate": 2.885673050794504e-05, "loss": 2.7367, "step": 28300 }, { "epoch": 0.8622219451531781, "grad_norm": 33.25, "learning_rate": 2.8818347236674253e-05, "loss": 2.7127, "step": 28310 }, { "epoch": 0.8625265095986578, "grad_norm": 31.375, "learning_rate": 2.8780043033714232e-05, "loss": 2.7159, "step": 28320 }, { "epoch": 0.8628310740441376, "grad_norm": 32.5, "learning_rate": 2.8741817936371985e-05, "loss": 2.6728, "step": 28330 }, { "epoch": 0.8631356384896173, "grad_norm": 31.5, "learning_rate": 2.870367198187759e-05, "loss": 2.7225, "step": 28340 }, { "epoch": 0.863440202935097, "grad_norm": 32.25, "learning_rate": 2.8665605207383934e-05, "loss": 2.7307, "step": 28350 }, { "epoch": 0.8637447673805768, "grad_norm": 32.75, "learning_rate": 2.862761764996682e-05, "loss": 2.7066, "step": 28360 }, { "epoch": 0.8640493318260566, "grad_norm": 31.75, "learning_rate": 2.8589709346624937e-05, "loss": 2.7489, "step": 28370 }, { "epoch": 0.8643538962715364, "grad_norm": 31.375, "learning_rate": 2.855188033427971e-05, "loss": 2.7184, "step": 28380 }, { "epoch": 0.8646584607170161, "grad_norm": 32.5, "learning_rate": 2.851413064977541e-05, "loss": 2.7221, "step": 28390 }, { "epoch": 0.8649630251624958, "grad_norm": 33.25, "learning_rate": 2.8476460329879012e-05, "loss": 2.6989, "step": 28400 }, { "epoch": 0.8652675896079756, "grad_norm": 32.0, "learning_rate": 2.8438869411280156e-05, "loss": 2.7156, "step": 28410 }, { "epoch": 0.8655721540534553, "grad_norm": 31.0, "learning_rate": 2.840135793059122e-05, "loss": 2.7176, "step": 28420 }, { "epoch": 0.8658767184989351, "grad_norm": 31.875, "learning_rate": 2.8363925924347167e-05, "loss": 2.7239, "step": 28430 }, { "epoch": 0.8661812829444149, "grad_norm": 32.75, "learning_rate": 2.8326573429005552e-05, "loss": 2.7395, "step": 28440 }, { "epoch": 0.8664858473898946, "grad_norm": 31.875, "learning_rate": 2.8289300480946514e-05, "loss": 2.7062, "step": 28450 }, { "epoch": 0.8667904118353743, "grad_norm": 32.25, "learning_rate": 2.825210711647268e-05, "loss": 2.7266, "step": 28460 }, { "epoch": 0.8670949762808541, "grad_norm": 34.25, "learning_rate": 2.821499337180923e-05, "loss": 2.7067, "step": 28470 }, { "epoch": 0.8673995407263339, "grad_norm": 32.5, "learning_rate": 2.8177959283103715e-05, "loss": 2.7169, "step": 28480 }, { "epoch": 0.8677041051718136, "grad_norm": 31.5, "learning_rate": 2.8141004886426164e-05, "loss": 2.6836, "step": 28490 }, { "epoch": 0.8680086696172934, "grad_norm": 32.25, "learning_rate": 2.8104130217768955e-05, "loss": 2.6861, "step": 28500 }, { "epoch": 0.8683132340627732, "grad_norm": 32.0, "learning_rate": 2.806733531304681e-05, "loss": 2.7014, "step": 28510 }, { "epoch": 0.8686177985082528, "grad_norm": 31.125, "learning_rate": 2.803062020809682e-05, "loss": 2.713, "step": 28520 }, { "epoch": 0.8689223629537326, "grad_norm": 32.75, "learning_rate": 2.7993984938678263e-05, "loss": 2.7198, "step": 28530 }, { "epoch": 0.8692269273992124, "grad_norm": 32.0, "learning_rate": 2.7957429540472764e-05, "loss": 2.745, "step": 28540 }, { "epoch": 0.8695314918446921, "grad_norm": 30.875, "learning_rate": 2.7920954049084075e-05, "loss": 2.7182, "step": 28550 }, { "epoch": 0.8698360562901719, "grad_norm": 30.875, "learning_rate": 2.7884558500038134e-05, "loss": 2.7387, "step": 28560 }, { "epoch": 0.8701406207356516, "grad_norm": 31.5, "learning_rate": 2.784824292878305e-05, "loss": 2.7106, "step": 28570 }, { "epoch": 0.8704451851811313, "grad_norm": 31.75, "learning_rate": 2.7812007370689004e-05, "loss": 2.728, "step": 28580 }, { "epoch": 0.8707497496266111, "grad_norm": 32.25, "learning_rate": 2.777585186104828e-05, "loss": 2.714, "step": 28590 }, { "epoch": 0.8710543140720909, "grad_norm": 32.25, "learning_rate": 2.7739776435075176e-05, "loss": 2.7208, "step": 28600 }, { "epoch": 0.8713588785175707, "grad_norm": 31.625, "learning_rate": 2.7703781127905965e-05, "loss": 2.6805, "step": 28610 }, { "epoch": 0.8716634429630504, "grad_norm": 33.0, "learning_rate": 2.766786597459895e-05, "loss": 2.737, "step": 28620 }, { "epoch": 0.8719680074085301, "grad_norm": 32.5, "learning_rate": 2.7632031010134313e-05, "loss": 2.7313, "step": 28630 }, { "epoch": 0.8722725718540099, "grad_norm": 31.875, "learning_rate": 2.759627626941415e-05, "loss": 2.7097, "step": 28640 }, { "epoch": 0.8725771362994896, "grad_norm": 32.5, "learning_rate": 2.756060178726244e-05, "loss": 2.7045, "step": 28650 }, { "epoch": 0.8728817007449694, "grad_norm": 33.5, "learning_rate": 2.7525007598424953e-05, "loss": 2.7246, "step": 28660 }, { "epoch": 0.8731862651904492, "grad_norm": 30.75, "learning_rate": 2.7489493737569317e-05, "loss": 2.7226, "step": 28670 }, { "epoch": 0.873490829635929, "grad_norm": 31.0, "learning_rate": 2.7454060239284855e-05, "loss": 2.693, "step": 28680 }, { "epoch": 0.8737953940814086, "grad_norm": 32.25, "learning_rate": 2.7418707138082655e-05, "loss": 2.7152, "step": 28690 }, { "epoch": 0.8740999585268884, "grad_norm": 31.875, "learning_rate": 2.738343446839553e-05, "loss": 2.6886, "step": 28700 }, { "epoch": 0.8744045229723681, "grad_norm": 32.0, "learning_rate": 2.7348242264577868e-05, "loss": 2.7156, "step": 28710 }, { "epoch": 0.8747090874178479, "grad_norm": 31.625, "learning_rate": 2.7313130560905787e-05, "loss": 2.6712, "step": 28720 }, { "epoch": 0.8750136518633277, "grad_norm": 31.875, "learning_rate": 2.7278099391576938e-05, "loss": 2.7064, "step": 28730 }, { "epoch": 0.8753182163088075, "grad_norm": 32.0, "learning_rate": 2.7243148790710533e-05, "loss": 2.6979, "step": 28740 }, { "epoch": 0.8756227807542871, "grad_norm": 31.25, "learning_rate": 2.7208278792347357e-05, "loss": 2.6921, "step": 28750 }, { "epoch": 0.8759273451997669, "grad_norm": 31.875, "learning_rate": 2.7173489430449638e-05, "loss": 2.73, "step": 28760 }, { "epoch": 0.8762319096452467, "grad_norm": 31.875, "learning_rate": 2.713878073890112e-05, "loss": 2.7382, "step": 28770 }, { "epoch": 0.8765364740907264, "grad_norm": 32.5, "learning_rate": 2.7104152751506932e-05, "loss": 2.7167, "step": 28780 }, { "epoch": 0.8768410385362062, "grad_norm": 30.5, "learning_rate": 2.7069605501993605e-05, "loss": 2.7276, "step": 28790 }, { "epoch": 0.8771456029816859, "grad_norm": 30.75, "learning_rate": 2.7035139024009072e-05, "loss": 2.714, "step": 28800 }, { "epoch": 0.8774501674271656, "grad_norm": 31.75, "learning_rate": 2.7000753351122555e-05, "loss": 2.7216, "step": 28810 }, { "epoch": 0.8777547318726454, "grad_norm": 35.0, "learning_rate": 2.6966448516824577e-05, "loss": 2.6911, "step": 28820 }, { "epoch": 0.8780592963181252, "grad_norm": 31.375, "learning_rate": 2.6932224554526973e-05, "loss": 2.7221, "step": 28830 }, { "epoch": 0.878363860763605, "grad_norm": 31.625, "learning_rate": 2.689808149756275e-05, "loss": 2.7194, "step": 28840 }, { "epoch": 0.8786684252090847, "grad_norm": 31.625, "learning_rate": 2.686401937918616e-05, "loss": 2.7415, "step": 28850 }, { "epoch": 0.8789729896545644, "grad_norm": 32.0, "learning_rate": 2.6830038232572614e-05, "loss": 2.7352, "step": 28860 }, { "epoch": 0.8792775541000442, "grad_norm": 33.0, "learning_rate": 2.679613809081861e-05, "loss": 2.7287, "step": 28870 }, { "epoch": 0.8795821185455239, "grad_norm": 32.0, "learning_rate": 2.676231898694186e-05, "loss": 2.7349, "step": 28880 }, { "epoch": 0.8798866829910037, "grad_norm": 31.0, "learning_rate": 2.6728580953881016e-05, "loss": 2.7121, "step": 28890 }, { "epoch": 0.8801912474364835, "grad_norm": 30.875, "learning_rate": 2.669492402449587e-05, "loss": 2.7181, "step": 28900 }, { "epoch": 0.8804958118819632, "grad_norm": 32.25, "learning_rate": 2.666134823156717e-05, "loss": 2.713, "step": 28910 }, { "epoch": 0.8808003763274429, "grad_norm": 30.75, "learning_rate": 2.662785360779667e-05, "loss": 2.7269, "step": 28920 }, { "epoch": 0.8811049407729227, "grad_norm": 31.125, "learning_rate": 2.659444018580704e-05, "loss": 2.7223, "step": 28930 }, { "epoch": 0.8814095052184024, "grad_norm": 32.25, "learning_rate": 2.656110799814187e-05, "loss": 2.7118, "step": 28940 }, { "epoch": 0.8817140696638822, "grad_norm": 35.0, "learning_rate": 2.6527857077265654e-05, "loss": 2.69, "step": 28950 }, { "epoch": 0.882018634109362, "grad_norm": 33.0, "learning_rate": 2.6494687455563715e-05, "loss": 2.6913, "step": 28960 }, { "epoch": 0.8823231985548418, "grad_norm": 32.25, "learning_rate": 2.6461599165342164e-05, "loss": 2.7095, "step": 28970 }, { "epoch": 0.8826277630003214, "grad_norm": 33.75, "learning_rate": 2.6428592238827964e-05, "loss": 2.7307, "step": 28980 }, { "epoch": 0.8829323274458012, "grad_norm": 31.625, "learning_rate": 2.6395666708168776e-05, "loss": 2.7657, "step": 28990 }, { "epoch": 0.883236891891281, "grad_norm": 31.25, "learning_rate": 2.6362822605433023e-05, "loss": 2.7072, "step": 29000 }, { "epoch": 0.8835414563367607, "grad_norm": 33.0, "learning_rate": 2.633005996260979e-05, "loss": 2.6855, "step": 29010 }, { "epoch": 0.8838460207822405, "grad_norm": 31.125, "learning_rate": 2.629737881160882e-05, "loss": 2.7153, "step": 29020 }, { "epoch": 0.8841505852277202, "grad_norm": 31.5, "learning_rate": 2.6264779184260524e-05, "loss": 2.6719, "step": 29030 }, { "epoch": 0.8844551496731999, "grad_norm": 32.5, "learning_rate": 2.623226111231586e-05, "loss": 2.7044, "step": 29040 }, { "epoch": 0.8847597141186797, "grad_norm": 32.25, "learning_rate": 2.6199824627446424e-05, "loss": 2.7016, "step": 29050 }, { "epoch": 0.8850642785641595, "grad_norm": 30.875, "learning_rate": 2.6167469761244266e-05, "loss": 2.7107, "step": 29060 }, { "epoch": 0.8853688430096393, "grad_norm": 32.0, "learning_rate": 2.6135196545221995e-05, "loss": 2.7138, "step": 29070 }, { "epoch": 0.885673407455119, "grad_norm": 31.125, "learning_rate": 2.6103005010812702e-05, "loss": 2.6926, "step": 29080 }, { "epoch": 0.8859779719005987, "grad_norm": 32.0, "learning_rate": 2.6070895189369853e-05, "loss": 2.7253, "step": 29090 }, { "epoch": 0.8862825363460785, "grad_norm": 31.5, "learning_rate": 2.6038867112167432e-05, "loss": 2.6886, "step": 29100 }, { "epoch": 0.8865871007915582, "grad_norm": 32.75, "learning_rate": 2.6006920810399733e-05, "loss": 2.7107, "step": 29110 }, { "epoch": 0.886891665237038, "grad_norm": 32.25, "learning_rate": 2.5975056315181394e-05, "loss": 2.741, "step": 29120 }, { "epoch": 0.8871962296825178, "grad_norm": 31.5, "learning_rate": 2.594327365754746e-05, "loss": 2.7116, "step": 29130 }, { "epoch": 0.8875007941279975, "grad_norm": 31.875, "learning_rate": 2.5911572868453182e-05, "loss": 2.7124, "step": 29140 }, { "epoch": 0.8878053585734772, "grad_norm": 31.75, "learning_rate": 2.5879953978774097e-05, "loss": 2.6949, "step": 29150 }, { "epoch": 0.888109923018957, "grad_norm": 31.375, "learning_rate": 2.5848417019306003e-05, "loss": 2.7394, "step": 29160 }, { "epoch": 0.8884144874644367, "grad_norm": 32.0, "learning_rate": 2.5816962020764863e-05, "loss": 2.7267, "step": 29170 }, { "epoch": 0.8887190519099165, "grad_norm": 31.75, "learning_rate": 2.5785589013786853e-05, "loss": 2.6921, "step": 29180 }, { "epoch": 0.8890236163553963, "grad_norm": 31.375, "learning_rate": 2.5754298028928275e-05, "loss": 2.7021, "step": 29190 }, { "epoch": 0.8893281808008761, "grad_norm": 31.625, "learning_rate": 2.572308909666549e-05, "loss": 2.7364, "step": 29200 }, { "epoch": 0.8896327452463557, "grad_norm": 34.75, "learning_rate": 2.569196224739505e-05, "loss": 2.6861, "step": 29210 }, { "epoch": 0.8899373096918355, "grad_norm": 33.0, "learning_rate": 2.5660917511433452e-05, "loss": 2.7486, "step": 29220 }, { "epoch": 0.8902418741373153, "grad_norm": 32.25, "learning_rate": 2.562995491901732e-05, "loss": 2.7401, "step": 29230 }, { "epoch": 0.890546438582795, "grad_norm": 32.25, "learning_rate": 2.5599074500303177e-05, "loss": 2.7116, "step": 29240 }, { "epoch": 0.8908510030282748, "grad_norm": 31.625, "learning_rate": 2.5568276285367553e-05, "loss": 2.7019, "step": 29250 }, { "epoch": 0.8911555674737545, "grad_norm": 33.75, "learning_rate": 2.5537560304206958e-05, "loss": 2.7257, "step": 29260 }, { "epoch": 0.8914601319192342, "grad_norm": 32.5, "learning_rate": 2.5506926586737716e-05, "loss": 2.6931, "step": 29270 }, { "epoch": 0.891764696364714, "grad_norm": 35.0, "learning_rate": 2.5476375162796135e-05, "loss": 2.74, "step": 29280 }, { "epoch": 0.8920692608101938, "grad_norm": 32.0, "learning_rate": 2.544590606213826e-05, "loss": 2.7492, "step": 29290 }, { "epoch": 0.8923738252556735, "grad_norm": 32.0, "learning_rate": 2.5415519314440046e-05, "loss": 2.7047, "step": 29300 }, { "epoch": 0.8926783897011533, "grad_norm": 32.0, "learning_rate": 2.5385214949297197e-05, "loss": 2.7322, "step": 29310 }, { "epoch": 0.892982954146633, "grad_norm": 31.25, "learning_rate": 2.535499299622518e-05, "loss": 2.7451, "step": 29320 }, { "epoch": 0.8932875185921128, "grad_norm": 31.375, "learning_rate": 2.5324853484659215e-05, "loss": 2.7059, "step": 29330 }, { "epoch": 0.8935920830375925, "grad_norm": 31.5, "learning_rate": 2.529479644395421e-05, "loss": 2.6886, "step": 29340 }, { "epoch": 0.8938966474830723, "grad_norm": 31.5, "learning_rate": 2.5264821903384738e-05, "loss": 2.684, "step": 29350 }, { "epoch": 0.8942012119285521, "grad_norm": 31.375, "learning_rate": 2.5234929892145055e-05, "loss": 2.7416, "step": 29360 }, { "epoch": 0.8945057763740318, "grad_norm": 31.625, "learning_rate": 2.5205120439348995e-05, "loss": 2.7186, "step": 29370 }, { "epoch": 0.8948103408195115, "grad_norm": 31.375, "learning_rate": 2.5175393574030027e-05, "loss": 2.7189, "step": 29380 }, { "epoch": 0.8951149052649913, "grad_norm": 32.25, "learning_rate": 2.514574932514115e-05, "loss": 2.6908, "step": 29390 }, { "epoch": 0.895419469710471, "grad_norm": 32.75, "learning_rate": 2.5116187721554885e-05, "loss": 2.682, "step": 29400 }, { "epoch": 0.8957240341559508, "grad_norm": 33.25, "learning_rate": 2.5086708792063318e-05, "loss": 2.7175, "step": 29410 }, { "epoch": 0.8960285986014306, "grad_norm": 31.5, "learning_rate": 2.505731256537795e-05, "loss": 2.7456, "step": 29420 }, { "epoch": 0.8963331630469104, "grad_norm": 32.0, "learning_rate": 2.5027999070129766e-05, "loss": 2.7333, "step": 29430 }, { "epoch": 0.89663772749239, "grad_norm": 32.25, "learning_rate": 2.499876833486916e-05, "loss": 2.7189, "step": 29440 }, { "epoch": 0.8969422919378698, "grad_norm": 30.625, "learning_rate": 2.496962038806592e-05, "loss": 2.6805, "step": 29450 }, { "epoch": 0.8972468563833496, "grad_norm": 32.5, "learning_rate": 2.494055525810922e-05, "loss": 2.7365, "step": 29460 }, { "epoch": 0.8975514208288293, "grad_norm": 33.0, "learning_rate": 2.4911572973307552e-05, "loss": 2.7257, "step": 29470 }, { "epoch": 0.8978559852743091, "grad_norm": 32.75, "learning_rate": 2.4882673561888726e-05, "loss": 2.7027, "step": 29480 }, { "epoch": 0.8981605497197888, "grad_norm": 31.875, "learning_rate": 2.485385705199984e-05, "loss": 2.7085, "step": 29490 }, { "epoch": 0.8984651141652685, "grad_norm": 32.25, "learning_rate": 2.4825123471707227e-05, "loss": 2.7468, "step": 29500 }, { "epoch": 0.8987696786107483, "grad_norm": 31.0, "learning_rate": 2.4796472848996494e-05, "loss": 2.7045, "step": 29510 }, { "epoch": 0.8990742430562281, "grad_norm": 31.25, "learning_rate": 2.4767905211772417e-05, "loss": 2.7204, "step": 29520 }, { "epoch": 0.8993788075017078, "grad_norm": 31.875, "learning_rate": 2.4739420587858935e-05, "loss": 2.7215, "step": 29530 }, { "epoch": 0.8996833719471876, "grad_norm": 36.75, "learning_rate": 2.4711019004999175e-05, "loss": 2.6818, "step": 29540 }, { "epoch": 0.8999879363926673, "grad_norm": 31.75, "learning_rate": 2.468270049085534e-05, "loss": 2.731, "step": 29550 }, { "epoch": 0.900292500838147, "grad_norm": 31.25, "learning_rate": 2.4654465073008772e-05, "loss": 2.7068, "step": 29560 }, { "epoch": 0.9005970652836268, "grad_norm": 32.0, "learning_rate": 2.4626312778959845e-05, "loss": 2.7077, "step": 29570 }, { "epoch": 0.9009016297291066, "grad_norm": 33.25, "learning_rate": 2.459824363612797e-05, "loss": 2.6816, "step": 29580 }, { "epoch": 0.9012061941745864, "grad_norm": 31.375, "learning_rate": 2.4570257671851623e-05, "loss": 2.7086, "step": 29590 }, { "epoch": 0.9015107586200661, "grad_norm": 32.0, "learning_rate": 2.45423549133882e-05, "loss": 2.7055, "step": 29600 }, { "epoch": 0.9018153230655458, "grad_norm": 32.25, "learning_rate": 2.45145353879141e-05, "loss": 2.724, "step": 29610 }, { "epoch": 0.9021198875110256, "grad_norm": 30.75, "learning_rate": 2.4486799122524655e-05, "loss": 2.6969, "step": 29620 }, { "epoch": 0.9024244519565053, "grad_norm": 31.375, "learning_rate": 2.4459146144234065e-05, "loss": 2.7223, "step": 29630 }, { "epoch": 0.9027290164019851, "grad_norm": 32.0, "learning_rate": 2.4431576479975447e-05, "loss": 2.6943, "step": 29640 }, { "epoch": 0.9030335808474649, "grad_norm": 32.25, "learning_rate": 2.4404090156600774e-05, "loss": 2.717, "step": 29650 }, { "epoch": 0.9033381452929446, "grad_norm": 32.5, "learning_rate": 2.4376687200880844e-05, "loss": 2.7043, "step": 29660 }, { "epoch": 0.9036427097384243, "grad_norm": 31.25, "learning_rate": 2.4349367639505244e-05, "loss": 2.7126, "step": 29670 }, { "epoch": 0.9039472741839041, "grad_norm": 32.5, "learning_rate": 2.4322131499082343e-05, "loss": 2.6877, "step": 29680 }, { "epoch": 0.9042518386293839, "grad_norm": 32.75, "learning_rate": 2.4294978806139264e-05, "loss": 2.7179, "step": 29690 }, { "epoch": 0.9045564030748636, "grad_norm": 35.5, "learning_rate": 2.426790958712184e-05, "loss": 2.7142, "step": 29700 }, { "epoch": 0.9048609675203434, "grad_norm": 31.375, "learning_rate": 2.4240923868394637e-05, "loss": 2.7307, "step": 29710 }, { "epoch": 0.9051655319658231, "grad_norm": 32.25, "learning_rate": 2.4214021676240872e-05, "loss": 2.6992, "step": 29720 }, { "epoch": 0.9054700964113028, "grad_norm": 33.75, "learning_rate": 2.418720303686241e-05, "loss": 2.733, "step": 29730 }, { "epoch": 0.9057746608567826, "grad_norm": 31.25, "learning_rate": 2.4160467976379737e-05, "loss": 2.7157, "step": 29740 }, { "epoch": 0.9060792253022624, "grad_norm": 31.625, "learning_rate": 2.413381652083193e-05, "loss": 2.7429, "step": 29750 }, { "epoch": 0.9063837897477421, "grad_norm": 32.5, "learning_rate": 2.410724869617667e-05, "loss": 2.7281, "step": 29760 }, { "epoch": 0.9066883541932219, "grad_norm": 31.25, "learning_rate": 2.408076452829015e-05, "loss": 2.7059, "step": 29770 }, { "epoch": 0.9069929186387016, "grad_norm": 32.75, "learning_rate": 2.4054364042967084e-05, "loss": 2.689, "step": 29780 }, { "epoch": 0.9072974830841813, "grad_norm": 32.5, "learning_rate": 2.4028047265920718e-05, "loss": 2.7278, "step": 29790 }, { "epoch": 0.9076020475296611, "grad_norm": 32.0, "learning_rate": 2.400181422278272e-05, "loss": 2.7178, "step": 29800 }, { "epoch": 0.9079066119751409, "grad_norm": 32.75, "learning_rate": 2.3975664939103243e-05, "loss": 2.7104, "step": 29810 }, { "epoch": 0.9082111764206207, "grad_norm": 32.75, "learning_rate": 2.394959944035084e-05, "loss": 2.7228, "step": 29820 }, { "epoch": 0.9085157408661004, "grad_norm": 31.625, "learning_rate": 2.3923617751912457e-05, "loss": 2.7208, "step": 29830 }, { "epoch": 0.9088203053115801, "grad_norm": 31.125, "learning_rate": 2.389771989909343e-05, "loss": 2.7203, "step": 29840 }, { "epoch": 0.9091248697570599, "grad_norm": 30.875, "learning_rate": 2.387190590711744e-05, "loss": 2.7123, "step": 29850 }, { "epoch": 0.9094294342025396, "grad_norm": 31.25, "learning_rate": 2.3846175801126462e-05, "loss": 2.7303, "step": 29860 }, { "epoch": 0.9097339986480194, "grad_norm": 33.0, "learning_rate": 2.382052960618082e-05, "loss": 2.7444, "step": 29870 }, { "epoch": 0.9100385630934992, "grad_norm": 31.875, "learning_rate": 2.3794967347259034e-05, "loss": 2.7139, "step": 29880 }, { "epoch": 0.910343127538979, "grad_norm": 33.5, "learning_rate": 2.3769489049257964e-05, "loss": 2.7473, "step": 29890 }, { "epoch": 0.9106476919844586, "grad_norm": 34.5, "learning_rate": 2.374409473699264e-05, "loss": 2.7155, "step": 29900 }, { "epoch": 0.9109522564299384, "grad_norm": 30.75, "learning_rate": 2.371878443519628e-05, "loss": 2.7096, "step": 29910 }, { "epoch": 0.9112568208754181, "grad_norm": 31.375, "learning_rate": 2.369355816852032e-05, "loss": 2.72, "step": 29920 }, { "epoch": 0.9115613853208979, "grad_norm": 31.375, "learning_rate": 2.366841596153433e-05, "loss": 2.6887, "step": 29930 }, { "epoch": 0.9118659497663777, "grad_norm": 32.0, "learning_rate": 2.3643357838726007e-05, "loss": 2.7239, "step": 29940 }, { "epoch": 0.9121705142118574, "grad_norm": 32.5, "learning_rate": 2.3618383824501157e-05, "loss": 2.7101, "step": 29950 }, { "epoch": 0.9124750786573371, "grad_norm": 32.5, "learning_rate": 2.3593493943183646e-05, "loss": 2.7097, "step": 29960 }, { "epoch": 0.9127796431028169, "grad_norm": 33.25, "learning_rate": 2.3568688219015433e-05, "loss": 2.7072, "step": 29970 }, { "epoch": 0.9130842075482967, "grad_norm": 32.0, "learning_rate": 2.3543966676156492e-05, "loss": 2.7207, "step": 29980 }, { "epoch": 0.9133887719937764, "grad_norm": 32.25, "learning_rate": 2.351932933868481e-05, "loss": 2.7322, "step": 29990 }, { "epoch": 0.9136933364392562, "grad_norm": 33.5, "learning_rate": 2.349477623059636e-05, "loss": 2.6858, "step": 30000 }, { "epoch": 0.9139979008847359, "grad_norm": 33.0, "learning_rate": 2.347030737580505e-05, "loss": 2.7017, "step": 30010 }, { "epoch": 0.9143024653302156, "grad_norm": 33.75, "learning_rate": 2.34459227981428e-05, "loss": 2.7548, "step": 30020 }, { "epoch": 0.9146070297756954, "grad_norm": 31.75, "learning_rate": 2.3421622521359373e-05, "loss": 2.7264, "step": 30030 }, { "epoch": 0.9149115942211752, "grad_norm": 31.25, "learning_rate": 2.3397406569122444e-05, "loss": 2.6927, "step": 30040 }, { "epoch": 0.915216158666655, "grad_norm": 32.5, "learning_rate": 2.3373274965017604e-05, "loss": 2.6871, "step": 30050 }, { "epoch": 0.9155207231121347, "grad_norm": 33.5, "learning_rate": 2.334922773254822e-05, "loss": 2.6895, "step": 30060 }, { "epoch": 0.9158252875576144, "grad_norm": 31.625, "learning_rate": 2.332526489513557e-05, "loss": 2.7133, "step": 30070 }, { "epoch": 0.9161298520030942, "grad_norm": 30.875, "learning_rate": 2.330138647611862e-05, "loss": 2.682, "step": 30080 }, { "epoch": 0.9164344164485739, "grad_norm": 31.75, "learning_rate": 2.3277592498754236e-05, "loss": 2.7377, "step": 30090 }, { "epoch": 0.9167389808940537, "grad_norm": 31.75, "learning_rate": 2.3253882986216963e-05, "loss": 2.7149, "step": 30100 }, { "epoch": 0.9170435453395335, "grad_norm": 31.75, "learning_rate": 2.3230257961599082e-05, "loss": 2.7297, "step": 30110 }, { "epoch": 0.9173481097850132, "grad_norm": 33.75, "learning_rate": 2.3206717447910632e-05, "loss": 2.721, "step": 30120 }, { "epoch": 0.9176526742304929, "grad_norm": 31.75, "learning_rate": 2.31832614680793e-05, "loss": 2.7341, "step": 30130 }, { "epoch": 0.9179572386759727, "grad_norm": 33.25, "learning_rate": 2.3159890044950467e-05, "loss": 2.7186, "step": 30140 }, { "epoch": 0.9182618031214524, "grad_norm": 33.25, "learning_rate": 2.3136603201287144e-05, "loss": 2.7064, "step": 30150 }, { "epoch": 0.9185663675669322, "grad_norm": 31.75, "learning_rate": 2.3113400959769937e-05, "loss": 2.7065, "step": 30160 }, { "epoch": 0.918870932012412, "grad_norm": 31.75, "learning_rate": 2.3090283342997107e-05, "loss": 2.6993, "step": 30170 }, { "epoch": 0.9191754964578916, "grad_norm": 30.5, "learning_rate": 2.306725037348447e-05, "loss": 2.7092, "step": 30180 }, { "epoch": 0.9194800609033714, "grad_norm": 31.125, "learning_rate": 2.304430207366538e-05, "loss": 2.725, "step": 30190 }, { "epoch": 0.9197846253488512, "grad_norm": 32.0, "learning_rate": 2.3021438465890754e-05, "loss": 2.7273, "step": 30200 }, { "epoch": 0.920089189794331, "grad_norm": 30.5, "learning_rate": 2.2998659572428994e-05, "loss": 2.72, "step": 30210 }, { "epoch": 0.9203937542398107, "grad_norm": 31.875, "learning_rate": 2.2975965415466018e-05, "loss": 2.704, "step": 30220 }, { "epoch": 0.9206983186852905, "grad_norm": 32.25, "learning_rate": 2.295335601710521e-05, "loss": 2.7206, "step": 30230 }, { "epoch": 0.9210028831307702, "grad_norm": 31.875, "learning_rate": 2.2930831399367363e-05, "loss": 2.73, "step": 30240 }, { "epoch": 0.9213074475762499, "grad_norm": 33.75, "learning_rate": 2.290839158419076e-05, "loss": 2.7404, "step": 30250 }, { "epoch": 0.9216120120217297, "grad_norm": 32.5, "learning_rate": 2.2886036593431048e-05, "loss": 2.7093, "step": 30260 }, { "epoch": 0.9219165764672095, "grad_norm": 31.625, "learning_rate": 2.286376644886127e-05, "loss": 2.7508, "step": 30270 }, { "epoch": 0.9222211409126893, "grad_norm": 31.0, "learning_rate": 2.2841581172171826e-05, "loss": 2.7198, "step": 30280 }, { "epoch": 0.922525705358169, "grad_norm": 31.875, "learning_rate": 2.281948078497044e-05, "loss": 2.7145, "step": 30290 }, { "epoch": 0.9228302698036487, "grad_norm": 32.75, "learning_rate": 2.2797465308782206e-05, "loss": 2.6984, "step": 30300 }, { "epoch": 0.9231348342491285, "grad_norm": 32.5, "learning_rate": 2.277553476504947e-05, "loss": 2.7318, "step": 30310 }, { "epoch": 0.9234393986946082, "grad_norm": 31.875, "learning_rate": 2.2753689175131884e-05, "loss": 2.7557, "step": 30320 }, { "epoch": 0.923743963140088, "grad_norm": 32.5, "learning_rate": 2.2731928560306343e-05, "loss": 2.7253, "step": 30330 }, { "epoch": 0.9240485275855678, "grad_norm": 32.0, "learning_rate": 2.2710252941766983e-05, "loss": 2.722, "step": 30340 }, { "epoch": 0.9243530920310475, "grad_norm": 32.0, "learning_rate": 2.268866234062516e-05, "loss": 2.7123, "step": 30350 }, { "epoch": 0.9246576564765272, "grad_norm": 31.625, "learning_rate": 2.2667156777909412e-05, "loss": 2.7077, "step": 30360 }, { "epoch": 0.924962220922007, "grad_norm": 32.25, "learning_rate": 2.264573627456548e-05, "loss": 2.7296, "step": 30370 }, { "epoch": 0.9252667853674867, "grad_norm": 29.875, "learning_rate": 2.2624400851456234e-05, "loss": 2.7063, "step": 30380 }, { "epoch": 0.9255713498129665, "grad_norm": 31.5, "learning_rate": 2.2603150529361684e-05, "loss": 2.7133, "step": 30390 }, { "epoch": 0.9258759142584463, "grad_norm": 32.5, "learning_rate": 2.258198532897897e-05, "loss": 2.733, "step": 30400 }, { "epoch": 0.926180478703926, "grad_norm": 31.875, "learning_rate": 2.2560905270922312e-05, "loss": 2.7626, "step": 30410 }, { "epoch": 0.9264850431494057, "grad_norm": 31.875, "learning_rate": 2.2539910375722993e-05, "loss": 2.7095, "step": 30420 }, { "epoch": 0.9267896075948855, "grad_norm": 32.5, "learning_rate": 2.251900066382938e-05, "loss": 2.6793, "step": 30430 }, { "epoch": 0.9270941720403653, "grad_norm": 31.375, "learning_rate": 2.2498176155606842e-05, "loss": 2.7055, "step": 30440 }, { "epoch": 0.927398736485845, "grad_norm": 32.25, "learning_rate": 2.2477436871337796e-05, "loss": 2.7462, "step": 30450 }, { "epoch": 0.9277033009313248, "grad_norm": 32.75, "learning_rate": 2.2456782831221633e-05, "loss": 2.6964, "step": 30460 }, { "epoch": 0.9280078653768045, "grad_norm": 33.0, "learning_rate": 2.24362140553747e-05, "loss": 2.765, "step": 30470 }, { "epoch": 0.9283124298222842, "grad_norm": 32.25, "learning_rate": 2.241573056383035e-05, "loss": 2.7526, "step": 30480 }, { "epoch": 0.928616994267764, "grad_norm": 31.25, "learning_rate": 2.2395332376538806e-05, "loss": 2.7535, "step": 30490 }, { "epoch": 0.9289215587132438, "grad_norm": 31.625, "learning_rate": 2.2375019513367296e-05, "loss": 2.7381, "step": 30500 }, { "epoch": 0.9292261231587235, "grad_norm": 31.75, "learning_rate": 2.235479199409984e-05, "loss": 2.7116, "step": 30510 }, { "epoch": 0.9295306876042033, "grad_norm": 33.0, "learning_rate": 2.2334649838437428e-05, "loss": 2.6917, "step": 30520 }, { "epoch": 0.929835252049683, "grad_norm": 32.75, "learning_rate": 2.2314593065997855e-05, "loss": 2.7236, "step": 30530 }, { "epoch": 0.9301398164951628, "grad_norm": 34.5, "learning_rate": 2.229462169631577e-05, "loss": 2.7342, "step": 30540 }, { "epoch": 0.9304443809406425, "grad_norm": 32.5, "learning_rate": 2.2274735748842643e-05, "loss": 2.7196, "step": 30550 }, { "epoch": 0.9307489453861223, "grad_norm": 31.125, "learning_rate": 2.2254935242946747e-05, "loss": 2.6969, "step": 30560 }, { "epoch": 0.9310535098316021, "grad_norm": 31.25, "learning_rate": 2.223522019791314e-05, "loss": 2.6904, "step": 30570 }, { "epoch": 0.9313580742770818, "grad_norm": 31.625, "learning_rate": 2.221559063294364e-05, "loss": 2.7447, "step": 30580 }, { "epoch": 0.9316626387225615, "grad_norm": 31.5, "learning_rate": 2.219604656715678e-05, "loss": 2.7363, "step": 30590 }, { "epoch": 0.9319672031680413, "grad_norm": 31.75, "learning_rate": 2.2176588019587902e-05, "loss": 2.6972, "step": 30600 }, { "epoch": 0.932271767613521, "grad_norm": 31.875, "learning_rate": 2.2157215009188965e-05, "loss": 2.7346, "step": 30610 }, { "epoch": 0.9325763320590008, "grad_norm": 32.0, "learning_rate": 2.213792755482867e-05, "loss": 2.735, "step": 30620 }, { "epoch": 0.9328808965044806, "grad_norm": 33.0, "learning_rate": 2.2118725675292376e-05, "loss": 2.7141, "step": 30630 }, { "epoch": 0.9331854609499604, "grad_norm": 32.25, "learning_rate": 2.209960938928208e-05, "loss": 2.7047, "step": 30640 }, { "epoch": 0.93349002539544, "grad_norm": 34.0, "learning_rate": 2.2080578715416448e-05, "loss": 2.6994, "step": 30650 }, { "epoch": 0.9337945898409198, "grad_norm": 31.75, "learning_rate": 2.2061633672230733e-05, "loss": 2.735, "step": 30660 }, { "epoch": 0.9340991542863996, "grad_norm": 31.25, "learning_rate": 2.2042774278176784e-05, "loss": 2.7095, "step": 30670 }, { "epoch": 0.9344037187318793, "grad_norm": 32.25, "learning_rate": 2.202400055162307e-05, "loss": 2.7164, "step": 30680 }, { "epoch": 0.9347082831773591, "grad_norm": 32.75, "learning_rate": 2.2005312510854563e-05, "loss": 2.6985, "step": 30690 }, { "epoch": 0.9350128476228388, "grad_norm": 33.25, "learning_rate": 2.198671017407284e-05, "loss": 2.7128, "step": 30700 }, { "epoch": 0.9353174120683185, "grad_norm": 31.75, "learning_rate": 2.1968193559395976e-05, "loss": 2.7119, "step": 30710 }, { "epoch": 0.9356219765137983, "grad_norm": 32.75, "learning_rate": 2.194976268485853e-05, "loss": 2.7269, "step": 30720 }, { "epoch": 0.9359265409592781, "grad_norm": 33.75, "learning_rate": 2.193141756841159e-05, "loss": 2.7143, "step": 30730 }, { "epoch": 0.9362311054047578, "grad_norm": 32.0, "learning_rate": 2.191315822792272e-05, "loss": 2.7193, "step": 30740 }, { "epoch": 0.9365356698502376, "grad_norm": 31.875, "learning_rate": 2.189498468117593e-05, "loss": 2.7029, "step": 30750 }, { "epoch": 0.9368402342957173, "grad_norm": 32.5, "learning_rate": 2.1876896945871646e-05, "loss": 2.7015, "step": 30760 }, { "epoch": 0.937144798741197, "grad_norm": 32.0, "learning_rate": 2.1858895039626747e-05, "loss": 2.6937, "step": 30770 }, { "epoch": 0.9374493631866768, "grad_norm": 31.875, "learning_rate": 2.1840978979974534e-05, "loss": 2.6867, "step": 30780 }, { "epoch": 0.9377539276321566, "grad_norm": 31.875, "learning_rate": 2.1823148784364645e-05, "loss": 2.7126, "step": 30790 }, { "epoch": 0.9380584920776364, "grad_norm": 32.25, "learning_rate": 2.180540447016311e-05, "loss": 2.7048, "step": 30800 }, { "epoch": 0.9383630565231161, "grad_norm": 32.5, "learning_rate": 2.1787746054652335e-05, "loss": 2.7319, "step": 30810 }, { "epoch": 0.9386676209685958, "grad_norm": 31.125, "learning_rate": 2.1770173555031034e-05, "loss": 2.7297, "step": 30820 }, { "epoch": 0.9389721854140756, "grad_norm": 31.875, "learning_rate": 2.1752686988414277e-05, "loss": 2.7295, "step": 30830 }, { "epoch": 0.9392767498595553, "grad_norm": 31.75, "learning_rate": 2.1735286371833393e-05, "loss": 2.701, "step": 30840 }, { "epoch": 0.9395813143050351, "grad_norm": 30.875, "learning_rate": 2.1717971722236024e-05, "loss": 2.7001, "step": 30850 }, { "epoch": 0.9398858787505149, "grad_norm": 32.25, "learning_rate": 2.17007430564861e-05, "loss": 2.6987, "step": 30860 }, { "epoch": 0.9401904431959947, "grad_norm": 31.75, "learning_rate": 2.168360039136376e-05, "loss": 2.6778, "step": 30870 }, { "epoch": 0.9404950076414743, "grad_norm": 41.25, "learning_rate": 2.1666543743565417e-05, "loss": 2.7269, "step": 30880 }, { "epoch": 0.9407995720869541, "grad_norm": 30.875, "learning_rate": 2.16495731297037e-05, "loss": 2.6978, "step": 30890 }, { "epoch": 0.9411041365324339, "grad_norm": 32.75, "learning_rate": 2.1632688566307436e-05, "loss": 2.7046, "step": 30900 }, { "epoch": 0.9414087009779136, "grad_norm": 31.625, "learning_rate": 2.1615890069821648e-05, "loss": 2.7326, "step": 30910 }, { "epoch": 0.9417132654233934, "grad_norm": 32.75, "learning_rate": 2.159917765660751e-05, "loss": 2.7036, "step": 30920 }, { "epoch": 0.9420178298688731, "grad_norm": 32.75, "learning_rate": 2.1582551342942415e-05, "loss": 2.7352, "step": 30930 }, { "epoch": 0.9423223943143528, "grad_norm": 32.25, "learning_rate": 2.1566011145019816e-05, "loss": 2.7126, "step": 30940 }, { "epoch": 0.9426269587598326, "grad_norm": 31.875, "learning_rate": 2.154955707894935e-05, "loss": 2.7123, "step": 30950 }, { "epoch": 0.9429315232053124, "grad_norm": 32.25, "learning_rate": 2.1533189160756742e-05, "loss": 2.6991, "step": 30960 }, { "epoch": 0.9432360876507921, "grad_norm": 31.5, "learning_rate": 2.1516907406383812e-05, "loss": 2.6902, "step": 30970 }, { "epoch": 0.9435406520962719, "grad_norm": 31.75, "learning_rate": 2.150071183168848e-05, "loss": 2.7264, "step": 30980 }, { "epoch": 0.9438452165417516, "grad_norm": 32.0, "learning_rate": 2.1484602452444702e-05, "loss": 2.7263, "step": 30990 }, { "epoch": 0.9441497809872313, "grad_norm": 31.125, "learning_rate": 2.146857928434248e-05, "loss": 2.6704, "step": 31000 }, { "epoch": 0.9444543454327111, "grad_norm": 31.75, "learning_rate": 2.1452642342987893e-05, "loss": 2.6963, "step": 31010 }, { "epoch": 0.9447589098781909, "grad_norm": 34.0, "learning_rate": 2.1436791643902978e-05, "loss": 2.7083, "step": 31020 }, { "epoch": 0.9450634743236707, "grad_norm": 33.0, "learning_rate": 2.1421027202525833e-05, "loss": 2.7238, "step": 31030 }, { "epoch": 0.9453680387691504, "grad_norm": 31.375, "learning_rate": 2.14053490342105e-05, "loss": 2.6896, "step": 31040 }, { "epoch": 0.9456726032146301, "grad_norm": 32.75, "learning_rate": 2.1389757154226995e-05, "loss": 2.7474, "step": 31050 }, { "epoch": 0.9459771676601099, "grad_norm": 31.75, "learning_rate": 2.1374251577761335e-05, "loss": 2.7653, "step": 31060 }, { "epoch": 0.9462817321055896, "grad_norm": 31.5, "learning_rate": 2.1358832319915427e-05, "loss": 2.7582, "step": 31070 }, { "epoch": 0.9465862965510694, "grad_norm": 31.125, "learning_rate": 2.1343499395707156e-05, "loss": 2.7129, "step": 31080 }, { "epoch": 0.9468908609965492, "grad_norm": 32.75, "learning_rate": 2.1328252820070268e-05, "loss": 2.7539, "step": 31090 }, { "epoch": 0.947195425442029, "grad_norm": 32.25, "learning_rate": 2.1313092607854447e-05, "loss": 2.7275, "step": 31100 }, { "epoch": 0.9474999898875086, "grad_norm": 32.0, "learning_rate": 2.1298018773825256e-05, "loss": 2.7265, "step": 31110 }, { "epoch": 0.9478045543329884, "grad_norm": 32.25, "learning_rate": 2.1283031332664117e-05, "loss": 2.7173, "step": 31120 }, { "epoch": 0.9481091187784682, "grad_norm": 31.25, "learning_rate": 2.1268130298968318e-05, "loss": 2.6822, "step": 31130 }, { "epoch": 0.9484136832239479, "grad_norm": 32.5, "learning_rate": 2.125331568725099e-05, "loss": 2.6935, "step": 31140 }, { "epoch": 0.9487182476694277, "grad_norm": 32.5, "learning_rate": 2.1238587511941057e-05, "loss": 2.7357, "step": 31150 }, { "epoch": 0.9490228121149074, "grad_norm": 32.0, "learning_rate": 2.122394578738333e-05, "loss": 2.7154, "step": 31160 }, { "epoch": 0.9493273765603871, "grad_norm": 32.25, "learning_rate": 2.120939052783836e-05, "loss": 2.6861, "step": 31170 }, { "epoch": 0.9496319410058669, "grad_norm": 31.75, "learning_rate": 2.1194921747482473e-05, "loss": 2.7344, "step": 31180 }, { "epoch": 0.9499365054513467, "grad_norm": 31.875, "learning_rate": 2.1180539460407833e-05, "loss": 2.735, "step": 31190 }, { "epoch": 0.9502410698968264, "grad_norm": 34.5, "learning_rate": 2.1166243680622308e-05, "loss": 2.713, "step": 31200 }, { "epoch": 0.9505456343423062, "grad_norm": 32.5, "learning_rate": 2.1152034422049528e-05, "loss": 2.7033, "step": 31210 }, { "epoch": 0.9508501987877859, "grad_norm": 33.0, "learning_rate": 2.113791169852884e-05, "loss": 2.7068, "step": 31220 }, { "epoch": 0.9511547632332656, "grad_norm": 31.875, "learning_rate": 2.1123875523815343e-05, "loss": 2.7476, "step": 31230 }, { "epoch": 0.9514593276787454, "grad_norm": 32.25, "learning_rate": 2.11099259115798e-05, "loss": 2.742, "step": 31240 }, { "epoch": 0.9517638921242252, "grad_norm": 32.75, "learning_rate": 2.1096062875408685e-05, "loss": 2.7513, "step": 31250 }, { "epoch": 0.952068456569705, "grad_norm": 32.0, "learning_rate": 2.108228642880417e-05, "loss": 2.7223, "step": 31260 }, { "epoch": 0.9523730210151847, "grad_norm": 32.25, "learning_rate": 2.1068596585184037e-05, "loss": 2.713, "step": 31270 }, { "epoch": 0.9526775854606644, "grad_norm": 32.25, "learning_rate": 2.1054993357881783e-05, "loss": 2.7195, "step": 31280 }, { "epoch": 0.9529821499061442, "grad_norm": 31.25, "learning_rate": 2.104147676014649e-05, "loss": 2.7235, "step": 31290 }, { "epoch": 0.9532867143516239, "grad_norm": 32.75, "learning_rate": 2.1028046805142894e-05, "loss": 2.726, "step": 31300 }, { "epoch": 0.9535912787971037, "grad_norm": 32.0, "learning_rate": 2.1014703505951345e-05, "loss": 2.7328, "step": 31310 }, { "epoch": 0.9538958432425835, "grad_norm": 31.375, "learning_rate": 2.1001446875567777e-05, "loss": 2.7109, "step": 31320 }, { "epoch": 0.9542004076880632, "grad_norm": 31.0, "learning_rate": 2.098827692690372e-05, "loss": 2.705, "step": 31330 }, { "epoch": 0.9545049721335429, "grad_norm": 31.625, "learning_rate": 2.0975193672786292e-05, "loss": 2.7233, "step": 31340 }, { "epoch": 0.9548095365790227, "grad_norm": 31.625, "learning_rate": 2.0962197125958135e-05, "loss": 2.7035, "step": 31350 }, { "epoch": 0.9551141010245024, "grad_norm": 31.625, "learning_rate": 2.0949287299077502e-05, "loss": 2.7093, "step": 31360 }, { "epoch": 0.9554186654699822, "grad_norm": 32.0, "learning_rate": 2.0936464204718116e-05, "loss": 2.718, "step": 31370 }, { "epoch": 0.955723229915462, "grad_norm": 31.125, "learning_rate": 2.0923727855369267e-05, "loss": 2.7115, "step": 31380 }, { "epoch": 0.9560277943609417, "grad_norm": 31.75, "learning_rate": 2.0911078263435768e-05, "loss": 2.7336, "step": 31390 }, { "epoch": 0.9563323588064214, "grad_norm": 32.0, "learning_rate": 2.089851544123788e-05, "loss": 2.6948, "step": 31400 }, { "epoch": 0.9566369232519012, "grad_norm": 31.875, "learning_rate": 2.0886039401011414e-05, "loss": 2.7206, "step": 31410 }, { "epoch": 0.956941487697381, "grad_norm": 33.5, "learning_rate": 2.0873650154907618e-05, "loss": 2.7295, "step": 31420 }, { "epoch": 0.9572460521428607, "grad_norm": 30.875, "learning_rate": 2.0861347714993208e-05, "loss": 2.7013, "step": 31430 }, { "epoch": 0.9575506165883405, "grad_norm": 32.25, "learning_rate": 2.0849132093250387e-05, "loss": 2.6968, "step": 31440 }, { "epoch": 0.9578551810338202, "grad_norm": 31.375, "learning_rate": 2.0837003301576762e-05, "loss": 2.7038, "step": 31450 }, { "epoch": 0.9581597454792999, "grad_norm": 31.75, "learning_rate": 2.0824961351785395e-05, "loss": 2.7578, "step": 31460 }, { "epoch": 0.9584643099247797, "grad_norm": 31.5, "learning_rate": 2.0813006255604737e-05, "loss": 2.7277, "step": 31470 }, { "epoch": 0.9587688743702595, "grad_norm": 32.0, "learning_rate": 2.0801138024678678e-05, "loss": 2.717, "step": 31480 }, { "epoch": 0.9590734388157393, "grad_norm": 31.5, "learning_rate": 2.0789356670566505e-05, "loss": 2.7042, "step": 31490 }, { "epoch": 0.959378003261219, "grad_norm": 32.5, "learning_rate": 2.0777662204742858e-05, "loss": 2.6942, "step": 31500 }, { "epoch": 0.9596825677066987, "grad_norm": 31.0, "learning_rate": 2.076605463859777e-05, "loss": 2.7056, "step": 31510 }, { "epoch": 0.9599871321521785, "grad_norm": 31.75, "learning_rate": 2.0754533983436646e-05, "loss": 2.7039, "step": 31520 }, { "epoch": 0.9602916965976582, "grad_norm": 31.125, "learning_rate": 2.074310025048023e-05, "loss": 2.7424, "step": 31530 }, { "epoch": 0.960596261043138, "grad_norm": 31.75, "learning_rate": 2.0731753450864604e-05, "loss": 2.741, "step": 31540 }, { "epoch": 0.9609008254886178, "grad_norm": 33.0, "learning_rate": 2.0720493595641195e-05, "loss": 2.712, "step": 31550 }, { "epoch": 0.9612053899340975, "grad_norm": 32.5, "learning_rate": 2.0709320695776715e-05, "loss": 2.7288, "step": 31560 }, { "epoch": 0.9615099543795772, "grad_norm": 32.5, "learning_rate": 2.0698234762153233e-05, "loss": 2.7526, "step": 31570 }, { "epoch": 0.961814518825057, "grad_norm": 31.0, "learning_rate": 2.068723580556809e-05, "loss": 2.734, "step": 31580 }, { "epoch": 0.9621190832705367, "grad_norm": 33.25, "learning_rate": 2.06763238367339e-05, "loss": 2.7198, "step": 31590 }, { "epoch": 0.9624236477160165, "grad_norm": 35.25, "learning_rate": 2.0665498866278578e-05, "loss": 2.692, "step": 31600 }, { "epoch": 0.9627282121614963, "grad_norm": 32.25, "learning_rate": 2.065476090474529e-05, "loss": 2.7097, "step": 31610 }, { "epoch": 0.963032776606976, "grad_norm": 31.25, "learning_rate": 2.0644109962592476e-05, "loss": 2.7208, "step": 31620 }, { "epoch": 0.9633373410524557, "grad_norm": 32.25, "learning_rate": 2.0633546050193784e-05, "loss": 2.6802, "step": 31630 }, { "epoch": 0.9636419054979355, "grad_norm": 31.875, "learning_rate": 2.0623069177838163e-05, "loss": 2.7165, "step": 31640 }, { "epoch": 0.9639464699434153, "grad_norm": 31.625, "learning_rate": 2.0612679355729717e-05, "loss": 2.6774, "step": 31650 }, { "epoch": 0.964251034388895, "grad_norm": 32.5, "learning_rate": 2.0602376593987813e-05, "loss": 2.7175, "step": 31660 }, { "epoch": 0.9645555988343748, "grad_norm": 33.75, "learning_rate": 2.0592160902647e-05, "loss": 2.6955, "step": 31670 }, { "epoch": 0.9648601632798545, "grad_norm": 32.25, "learning_rate": 2.0582032291657026e-05, "loss": 2.7172, "step": 31680 }, { "epoch": 0.9651647277253342, "grad_norm": 32.5, "learning_rate": 2.057199077088285e-05, "loss": 2.7285, "step": 31690 }, { "epoch": 0.965469292170814, "grad_norm": 31.625, "learning_rate": 2.056203635010458e-05, "loss": 2.7136, "step": 31700 }, { "epoch": 0.9657738566162938, "grad_norm": 30.875, "learning_rate": 2.0552169039017503e-05, "loss": 2.7016, "step": 31710 }, { "epoch": 0.9660784210617736, "grad_norm": 31.5, "learning_rate": 2.0542388847232056e-05, "loss": 2.6802, "step": 31720 }, { "epoch": 0.9663829855072533, "grad_norm": 32.5, "learning_rate": 2.0532695784273827e-05, "loss": 2.7301, "step": 31730 }, { "epoch": 0.966687549952733, "grad_norm": 32.25, "learning_rate": 2.0523089859583567e-05, "loss": 2.698, "step": 31740 }, { "epoch": 0.9669921143982128, "grad_norm": 31.625, "learning_rate": 2.051357108251711e-05, "loss": 2.7076, "step": 31750 }, { "epoch": 0.9672966788436925, "grad_norm": 32.25, "learning_rate": 2.050413946234546e-05, "loss": 2.7003, "step": 31760 }, { "epoch": 0.9676012432891723, "grad_norm": 33.0, "learning_rate": 2.04947950082547e-05, "loss": 2.7069, "step": 31770 }, { "epoch": 0.9679058077346521, "grad_norm": 32.5, "learning_rate": 2.048553772934602e-05, "loss": 2.7171, "step": 31780 }, { "epoch": 0.9682103721801318, "grad_norm": 32.5, "learning_rate": 2.0476367634635725e-05, "loss": 2.7418, "step": 31790 }, { "epoch": 0.9685149366256115, "grad_norm": 32.25, "learning_rate": 2.0467284733055187e-05, "loss": 2.7102, "step": 31800 }, { "epoch": 0.9688195010710913, "grad_norm": 33.0, "learning_rate": 2.045828903345085e-05, "loss": 2.7066, "step": 31810 }, { "epoch": 0.969124065516571, "grad_norm": 31.625, "learning_rate": 2.044938054458425e-05, "loss": 2.7305, "step": 31820 }, { "epoch": 0.9694286299620508, "grad_norm": 32.75, "learning_rate": 2.044055927513196e-05, "loss": 2.7436, "step": 31830 }, { "epoch": 0.9697331944075306, "grad_norm": 31.0, "learning_rate": 2.0431825233685614e-05, "loss": 2.7143, "step": 31840 }, { "epoch": 0.9700377588530102, "grad_norm": 31.875, "learning_rate": 2.0423178428751887e-05, "loss": 2.7006, "step": 31850 }, { "epoch": 0.97034232329849, "grad_norm": 32.5, "learning_rate": 2.041461886875248e-05, "loss": 2.7276, "step": 31860 }, { "epoch": 0.9706468877439698, "grad_norm": 31.875, "learning_rate": 2.040614656202413e-05, "loss": 2.7251, "step": 31870 }, { "epoch": 0.9709514521894496, "grad_norm": 31.5, "learning_rate": 2.039776151681861e-05, "loss": 2.6952, "step": 31880 }, { "epoch": 0.9712560166349293, "grad_norm": 31.875, "learning_rate": 2.038946374130266e-05, "loss": 2.7325, "step": 31890 }, { "epoch": 0.9715605810804091, "grad_norm": 31.625, "learning_rate": 2.0381253243558055e-05, "loss": 2.7123, "step": 31900 }, { "epoch": 0.9718651455258888, "grad_norm": 32.25, "learning_rate": 2.037313003158155e-05, "loss": 2.748, "step": 31910 }, { "epoch": 0.9721697099713685, "grad_norm": 32.0, "learning_rate": 2.03650941132849e-05, "loss": 2.7289, "step": 31920 }, { "epoch": 0.9724742744168483, "grad_norm": 31.875, "learning_rate": 2.0357145496494825e-05, "loss": 2.7078, "step": 31930 }, { "epoch": 0.9727788388623281, "grad_norm": 32.25, "learning_rate": 2.034928418895302e-05, "loss": 2.7104, "step": 31940 }, { "epoch": 0.9730834033078078, "grad_norm": 33.0, "learning_rate": 2.0341510198316145e-05, "loss": 2.6984, "step": 31950 }, { "epoch": 0.9733879677532876, "grad_norm": 32.75, "learning_rate": 2.0333823532155823e-05, "loss": 2.7437, "step": 31960 }, { "epoch": 0.9736925321987673, "grad_norm": 33.5, "learning_rate": 2.0326224197958607e-05, "loss": 2.7434, "step": 31970 }, { "epoch": 0.973997096644247, "grad_norm": 34.0, "learning_rate": 2.031871220312602e-05, "loss": 2.7254, "step": 31980 }, { "epoch": 0.9743016610897268, "grad_norm": 32.25, "learning_rate": 2.0311287554974484e-05, "loss": 2.7351, "step": 31990 }, { "epoch": 0.9746062255352066, "grad_norm": 30.875, "learning_rate": 2.0303950260735377e-05, "loss": 2.7496, "step": 32000 }, { "epoch": 0.9749107899806864, "grad_norm": 31.375, "learning_rate": 2.029670032755499e-05, "loss": 2.7063, "step": 32010 }, { "epoch": 0.9752153544261661, "grad_norm": 31.875, "learning_rate": 2.028953776249452e-05, "loss": 2.7271, "step": 32020 }, { "epoch": 0.9755199188716458, "grad_norm": 31.625, "learning_rate": 2.028246257253007e-05, "loss": 2.6857, "step": 32030 }, { "epoch": 0.9758244833171256, "grad_norm": 31.5, "learning_rate": 2.0275474764552645e-05, "loss": 2.7174, "step": 32040 }, { "epoch": 0.9761290477626053, "grad_norm": 31.875, "learning_rate": 2.0268574345368164e-05, "loss": 2.7203, "step": 32050 }, { "epoch": 0.9764336122080851, "grad_norm": 32.25, "learning_rate": 2.0261761321697387e-05, "loss": 2.6827, "step": 32060 }, { "epoch": 0.9767381766535649, "grad_norm": 32.5, "learning_rate": 2.0255035700176002e-05, "loss": 2.715, "step": 32070 }, { "epoch": 0.9770427410990445, "grad_norm": 31.0, "learning_rate": 2.024839748735452e-05, "loss": 2.7251, "step": 32080 }, { "epoch": 0.9773473055445243, "grad_norm": 31.625, "learning_rate": 2.0241846689698375e-05, "loss": 2.6934, "step": 32090 }, { "epoch": 0.9776518699900041, "grad_norm": 31.75, "learning_rate": 2.0235383313587812e-05, "loss": 2.7141, "step": 32100 }, { "epoch": 0.9779564344354839, "grad_norm": 31.625, "learning_rate": 2.022900736531796e-05, "loss": 2.7192, "step": 32110 }, { "epoch": 0.9782609988809636, "grad_norm": 32.0, "learning_rate": 2.022271885109879e-05, "loss": 2.7444, "step": 32120 }, { "epoch": 0.9785655633264434, "grad_norm": 32.5, "learning_rate": 2.0216517777055112e-05, "loss": 2.726, "step": 32130 }, { "epoch": 0.9788701277719231, "grad_norm": 31.75, "learning_rate": 2.0210404149226554e-05, "loss": 2.7205, "step": 32140 }, { "epoch": 0.9791746922174028, "grad_norm": 30.75, "learning_rate": 2.0204377973567614e-05, "loss": 2.7297, "step": 32150 }, { "epoch": 0.9794792566628826, "grad_norm": 30.0, "learning_rate": 2.0198439255947586e-05, "loss": 2.6951, "step": 32160 }, { "epoch": 0.9797838211083624, "grad_norm": 34.0, "learning_rate": 2.019258800215059e-05, "loss": 2.705, "step": 32170 }, { "epoch": 0.9800883855538421, "grad_norm": 32.5, "learning_rate": 2.0186824217875553e-05, "loss": 2.7178, "step": 32180 }, { "epoch": 0.9803929499993219, "grad_norm": 32.25, "learning_rate": 2.018114790873622e-05, "loss": 2.7255, "step": 32190 }, { "epoch": 0.9806975144448016, "grad_norm": 31.625, "learning_rate": 2.0175559080261124e-05, "loss": 2.7517, "step": 32200 }, { "epoch": 0.9810020788902813, "grad_norm": 29.875, "learning_rate": 2.017005773789362e-05, "loss": 2.7075, "step": 32210 }, { "epoch": 0.9813066433357611, "grad_norm": 32.0, "learning_rate": 2.0164643886991822e-05, "loss": 2.7248, "step": 32220 }, { "epoch": 0.9816112077812409, "grad_norm": 32.5, "learning_rate": 2.0159317532828666e-05, "loss": 2.6964, "step": 32230 }, { "epoch": 0.9819157722267207, "grad_norm": 31.875, "learning_rate": 2.0154078680591824e-05, "loss": 2.7269, "step": 32240 }, { "epoch": 0.9822203366722004, "grad_norm": 33.25, "learning_rate": 2.0148927335383786e-05, "loss": 2.7189, "step": 32250 }, { "epoch": 0.9825249011176801, "grad_norm": 33.0, "learning_rate": 2.0143863502221787e-05, "loss": 2.7444, "step": 32260 }, { "epoch": 0.9828294655631599, "grad_norm": 30.875, "learning_rate": 2.013888718603784e-05, "loss": 2.682, "step": 32270 }, { "epoch": 0.9831340300086396, "grad_norm": 33.0, "learning_rate": 2.0133998391678723e-05, "loss": 2.7223, "step": 32280 }, { "epoch": 0.9834385944541194, "grad_norm": 32.5, "learning_rate": 2.0129197123905967e-05, "loss": 2.7521, "step": 32290 }, { "epoch": 0.9837431588995992, "grad_norm": 31.375, "learning_rate": 2.0124483387395838e-05, "loss": 2.7121, "step": 32300 }, { "epoch": 0.9840477233450788, "grad_norm": 32.0, "learning_rate": 2.011985718673936e-05, "loss": 2.7354, "step": 32310 }, { "epoch": 0.9843522877905586, "grad_norm": 31.625, "learning_rate": 2.0115318526442316e-05, "loss": 2.7196, "step": 32320 }, { "epoch": 0.9846568522360384, "grad_norm": 32.5, "learning_rate": 2.011086741092522e-05, "loss": 2.7287, "step": 32330 }, { "epoch": 0.9849614166815182, "grad_norm": 33.0, "learning_rate": 2.01065038445233e-05, "loss": 2.6921, "step": 32340 }, { "epoch": 0.9852659811269979, "grad_norm": 31.125, "learning_rate": 2.0102227831486528e-05, "loss": 2.7429, "step": 32350 }, { "epoch": 0.9855705455724777, "grad_norm": 31.0, "learning_rate": 2.0098039375979622e-05, "loss": 2.7329, "step": 32360 }, { "epoch": 0.9858751100179574, "grad_norm": 31.875, "learning_rate": 2.009393848208198e-05, "loss": 2.7041, "step": 32370 }, { "epoch": 0.9861796744634371, "grad_norm": 32.5, "learning_rate": 2.0089925153787753e-05, "loss": 2.7358, "step": 32380 }, { "epoch": 0.9864842389089169, "grad_norm": 32.25, "learning_rate": 2.00859993950058e-05, "loss": 2.7098, "step": 32390 }, { "epoch": 0.9867888033543967, "grad_norm": 31.125, "learning_rate": 2.008216120955966e-05, "loss": 2.7118, "step": 32400 }, { "epoch": 0.9870933677998764, "grad_norm": 31.375, "learning_rate": 2.0078410601187626e-05, "loss": 2.6996, "step": 32410 }, { "epoch": 0.9873979322453562, "grad_norm": 33.75, "learning_rate": 2.0074747573542656e-05, "loss": 2.7192, "step": 32420 }, { "epoch": 0.9877024966908359, "grad_norm": 31.125, "learning_rate": 2.007117213019243e-05, "loss": 2.7, "step": 32430 }, { "epoch": 0.9880070611363156, "grad_norm": 32.5, "learning_rate": 2.0067684274619297e-05, "loss": 2.7237, "step": 32440 }, { "epoch": 0.9883116255817954, "grad_norm": 31.75, "learning_rate": 2.0064284010220333e-05, "loss": 2.7423, "step": 32450 }, { "epoch": 0.9886161900272752, "grad_norm": 31.5, "learning_rate": 2.0060971340307285e-05, "loss": 2.7135, "step": 32460 }, { "epoch": 0.988920754472755, "grad_norm": 33.25, "learning_rate": 2.0057746268106574e-05, "loss": 2.6904, "step": 32470 }, { "epoch": 0.9892253189182347, "grad_norm": 33.0, "learning_rate": 2.005460879675933e-05, "loss": 2.7203, "step": 32480 }, { "epoch": 0.9895298833637144, "grad_norm": 30.75, "learning_rate": 2.0051558929321342e-05, "loss": 2.6902, "step": 32490 }, { "epoch": 0.9898344478091942, "grad_norm": 32.0, "learning_rate": 2.0048596668763085e-05, "loss": 2.7056, "step": 32500 }, { "epoch": 0.9901390122546739, "grad_norm": 31.875, "learning_rate": 2.00457220179697e-05, "loss": 2.6981, "step": 32510 }, { "epoch": 0.9904435767001537, "grad_norm": 32.0, "learning_rate": 2.0042934979741008e-05, "loss": 2.751, "step": 32520 }, { "epoch": 0.9907481411456335, "grad_norm": 31.125, "learning_rate": 2.0040235556791498e-05, "loss": 2.7017, "step": 32530 }, { "epoch": 0.9910527055911131, "grad_norm": 31.5, "learning_rate": 2.0037623751750315e-05, "loss": 2.7264, "step": 32540 }, { "epoch": 0.9913572700365929, "grad_norm": 31.875, "learning_rate": 2.0035099567161265e-05, "loss": 2.707, "step": 32550 }, { "epoch": 0.9916618344820727, "grad_norm": 31.625, "learning_rate": 2.0032663005482845e-05, "loss": 2.7152, "step": 32560 }, { "epoch": 0.9919663989275525, "grad_norm": 30.75, "learning_rate": 2.0030314069088173e-05, "loss": 2.6908, "step": 32570 }, { "epoch": 0.9922709633730322, "grad_norm": 31.625, "learning_rate": 2.002805276026504e-05, "loss": 2.6908, "step": 32580 }, { "epoch": 0.992575527818512, "grad_norm": 31.125, "learning_rate": 2.0025879081215887e-05, "loss": 2.6996, "step": 32590 }, { "epoch": 0.9928800922639917, "grad_norm": 32.75, "learning_rate": 2.0023793034057806e-05, "loss": 2.7081, "step": 32600 }, { "epoch": 0.9931846567094714, "grad_norm": 33.0, "learning_rate": 2.002179462082254e-05, "loss": 2.7223, "step": 32610 }, { "epoch": 0.9934892211549512, "grad_norm": 31.0, "learning_rate": 2.001988384345648e-05, "loss": 2.6856, "step": 32620 }, { "epoch": 0.993793785600431, "grad_norm": 31.5, "learning_rate": 2.001806070382066e-05, "loss": 2.7274, "step": 32630 }, { "epoch": 0.9940983500459107, "grad_norm": 31.75, "learning_rate": 2.0016325203690765e-05, "loss": 2.7353, "step": 32640 }, { "epoch": 0.9944029144913905, "grad_norm": 31.625, "learning_rate": 2.0014677344757108e-05, "loss": 2.7037, "step": 32650 }, { "epoch": 0.9947074789368702, "grad_norm": 33.0, "learning_rate": 2.0013117128624656e-05, "loss": 2.7186, "step": 32660 }, { "epoch": 0.9950120433823499, "grad_norm": 31.625, "learning_rate": 2.001164455681301e-05, "loss": 2.7252, "step": 32670 }, { "epoch": 0.9953166078278297, "grad_norm": 31.75, "learning_rate": 2.001025963075639e-05, "loss": 2.7333, "step": 32680 }, { "epoch": 0.9956211722733095, "grad_norm": 32.75, "learning_rate": 2.00089623518037e-05, "loss": 2.6849, "step": 32690 }, { "epoch": 0.9959257367187893, "grad_norm": 32.25, "learning_rate": 2.0007752721218424e-05, "loss": 2.6881, "step": 32700 }, { "epoch": 0.996230301164269, "grad_norm": 32.25, "learning_rate": 2.0006630740178716e-05, "loss": 2.7208, "step": 32710 }, { "epoch": 0.9965348656097487, "grad_norm": 31.75, "learning_rate": 2.0005596409777338e-05, "loss": 2.6903, "step": 32720 }, { "epoch": 0.9968394300552285, "grad_norm": 31.125, "learning_rate": 2.0004649731021714e-05, "loss": 2.6914, "step": 32730 }, { "epoch": 0.9971439945007082, "grad_norm": 32.25, "learning_rate": 2.0003790704833848e-05, "loss": 2.706, "step": 32740 }, { "epoch": 0.997448558946188, "grad_norm": 32.25, "learning_rate": 2.000301933205043e-05, "loss": 2.7184, "step": 32750 }, { "epoch": 0.9977531233916678, "grad_norm": 32.0, "learning_rate": 2.000233561342275e-05, "loss": 2.6878, "step": 32760 }, { "epoch": 0.9980576878371474, "grad_norm": 32.5, "learning_rate": 2.0001739549616717e-05, "loss": 2.6879, "step": 32770 }, { "epoch": 0.9983622522826272, "grad_norm": 31.625, "learning_rate": 2.000123114121288e-05, "loss": 2.734, "step": 32780 }, { "epoch": 0.998666816728107, "grad_norm": 61.0, "learning_rate": 2.0000810388706412e-05, "loss": 2.7327, "step": 32790 }, { "epoch": 0.9989713811735867, "grad_norm": 32.0, "learning_rate": 2.000047729250712e-05, "loss": 2.688, "step": 32800 }, { "epoch": 0.9992759456190665, "grad_norm": 33.25, "learning_rate": 2.000023185293942e-05, "loss": 2.7316, "step": 32810 }, { "epoch": 0.9995805100645463, "grad_norm": 31.875, "learning_rate": 2.0000074070242363e-05, "loss": 2.7248, "step": 32820 }, { "epoch": 0.999885074510026, "grad_norm": 33.5, "learning_rate": 2.0000003944569635e-05, "loss": 2.7316, "step": 32830 }, { "epoch": 0.9999764438436699, "step": 32833, "total_flos": 1.0716317475723333e+20, "train_loss": 2.5502642188082225, "train_runtime": 952126.1699, "train_samples_per_second": 8.828, "train_steps_per_second": 0.034 } ], "logging_steps": 10, "max_steps": 32833, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0716317475723333e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }