gliner-biomed-large-rel-1stg-v1.0 / trainer_state.json
Ihor's picture
Upload folder using huggingface_hub
f20aeb1 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.8026137899954935,
"eval_steps": 500,
"global_step": 80000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002253267237494367,
"grad_norm": 388160.65625,
"learning_rate": 6.25e-07,
"loss": 54363.64,
"step": 100
},
{
"epoch": 0.004506534474988734,
"grad_norm": 168947.046875,
"learning_rate": 1.25e-06,
"loss": 31506.035,
"step": 200
},
{
"epoch": 0.0067598017124831005,
"grad_norm": 1104.2374267578125,
"learning_rate": 1.875e-06,
"loss": 1566.8063,
"step": 300
},
{
"epoch": 0.009013068949977467,
"grad_norm": 1060.940673828125,
"learning_rate": 2.5e-06,
"loss": 321.4997,
"step": 400
},
{
"epoch": 0.011266336187471835,
"grad_norm": 558.04296875,
"learning_rate": 3.125e-06,
"loss": 310.3988,
"step": 500
},
{
"epoch": 0.013519603424966201,
"grad_norm": 1218.569091796875,
"learning_rate": 3.75e-06,
"loss": 295.2427,
"step": 600
},
{
"epoch": 0.015772870662460567,
"grad_norm": 1162.5400390625,
"learning_rate": 4.375e-06,
"loss": 264.2469,
"step": 700
},
{
"epoch": 0.018026137899954935,
"grad_norm": 1011.2730102539062,
"learning_rate": 5e-06,
"loss": 251.2306,
"step": 800
},
{
"epoch": 0.020279405137449302,
"grad_norm": 2534.6201171875,
"learning_rate": 5.625e-06,
"loss": 233.5792,
"step": 900
},
{
"epoch": 0.02253267237494367,
"grad_norm": 968.279052734375,
"learning_rate": 6.25e-06,
"loss": 190.202,
"step": 1000
},
{
"epoch": 0.024785939612438034,
"grad_norm": 1023.931884765625,
"learning_rate": 6.875000000000001e-06,
"loss": 205.8357,
"step": 1100
},
{
"epoch": 0.027039206849932402,
"grad_norm": 921.0325927734375,
"learning_rate": 7.5e-06,
"loss": 204.0791,
"step": 1200
},
{
"epoch": 0.02929247408742677,
"grad_norm": 2596.747314453125,
"learning_rate": 8.125000000000001e-06,
"loss": 191.1482,
"step": 1300
},
{
"epoch": 0.031545741324921134,
"grad_norm": 7635.25634765625,
"learning_rate": 8.75e-06,
"loss": 202.4382,
"step": 1400
},
{
"epoch": 0.0337990085624155,
"grad_norm": 973.6043701171875,
"learning_rate": 9.375000000000001e-06,
"loss": 189.9945,
"step": 1500
},
{
"epoch": 0.03605227579990987,
"grad_norm": 1417.5162353515625,
"learning_rate": 1e-05,
"loss": 193.7698,
"step": 1600
},
{
"epoch": 0.03830554303740424,
"grad_norm": 1212.5467529296875,
"learning_rate": 1.0625e-05,
"loss": 170.7415,
"step": 1700
},
{
"epoch": 0.040558810274898605,
"grad_norm": 814.8417358398438,
"learning_rate": 1.125e-05,
"loss": 199.9652,
"step": 1800
},
{
"epoch": 0.04281207751239297,
"grad_norm": 2018.5306396484375,
"learning_rate": 1.1875e-05,
"loss": 181.4627,
"step": 1900
},
{
"epoch": 0.04506534474988734,
"grad_norm": 865.38427734375,
"learning_rate": 1.25e-05,
"loss": 178.8928,
"step": 2000
},
{
"epoch": 0.0473186119873817,
"grad_norm": 3710.726318359375,
"learning_rate": 1.3125e-05,
"loss": 187.2118,
"step": 2100
},
{
"epoch": 0.04957187922487607,
"grad_norm": 890.4869995117188,
"learning_rate": 1.3750000000000002e-05,
"loss": 175.1732,
"step": 2200
},
{
"epoch": 0.051825146462370436,
"grad_norm": 1558.48876953125,
"learning_rate": 1.4374999999999999e-05,
"loss": 182.1822,
"step": 2300
},
{
"epoch": 0.054078413699864804,
"grad_norm": 899.6353149414062,
"learning_rate": 1.5e-05,
"loss": 174.2233,
"step": 2400
},
{
"epoch": 0.05633168093735917,
"grad_norm": 2251.648193359375,
"learning_rate": 1.5625e-05,
"loss": 171.1752,
"step": 2500
},
{
"epoch": 0.05858494817485354,
"grad_norm": 12215.8701171875,
"learning_rate": 1.6250000000000002e-05,
"loss": 164.0133,
"step": 2600
},
{
"epoch": 0.06083821541234791,
"grad_norm": 9239.3564453125,
"learning_rate": 1.6875000000000004e-05,
"loss": 162.7651,
"step": 2700
},
{
"epoch": 0.06309148264984227,
"grad_norm": 1981.06494140625,
"learning_rate": 1.75e-05,
"loss": 149.4263,
"step": 2800
},
{
"epoch": 0.06534474988733664,
"grad_norm": 1472.9056396484375,
"learning_rate": 1.8125e-05,
"loss": 139.6899,
"step": 2900
},
{
"epoch": 0.067598017124831,
"grad_norm": 840.9261474609375,
"learning_rate": 1.8750000000000002e-05,
"loss": 147.1371,
"step": 3000
},
{
"epoch": 0.06985128436232538,
"grad_norm": 1072.093994140625,
"learning_rate": 1.9375e-05,
"loss": 136.6015,
"step": 3100
},
{
"epoch": 0.07210455159981974,
"grad_norm": 1688.5560302734375,
"learning_rate": 2e-05,
"loss": 143.6873,
"step": 3200
},
{
"epoch": 0.0743578188373141,
"grad_norm": 2323.202880859375,
"learning_rate": 2.0625e-05,
"loss": 133.3337,
"step": 3300
},
{
"epoch": 0.07661108607480847,
"grad_norm": 7761.54638671875,
"learning_rate": 2.125e-05,
"loss": 130.355,
"step": 3400
},
{
"epoch": 0.07886435331230283,
"grad_norm": 2193.791015625,
"learning_rate": 2.1875e-05,
"loss": 140.9645,
"step": 3500
},
{
"epoch": 0.08111762054979721,
"grad_norm": 1536.9178466796875,
"learning_rate": 2.25e-05,
"loss": 123.8844,
"step": 3600
},
{
"epoch": 0.08337088778729157,
"grad_norm": 1548.496826171875,
"learning_rate": 2.3125000000000003e-05,
"loss": 128.6075,
"step": 3700
},
{
"epoch": 0.08562415502478594,
"grad_norm": 1170.0714111328125,
"learning_rate": 2.375e-05,
"loss": 126.8377,
"step": 3800
},
{
"epoch": 0.0878774222622803,
"grad_norm": 756.2473754882812,
"learning_rate": 2.4375e-05,
"loss": 125.7238,
"step": 3900
},
{
"epoch": 0.09013068949977468,
"grad_norm": 456.4137268066406,
"learning_rate": 2.5e-05,
"loss": 120.9195,
"step": 4000
},
{
"epoch": 0.09238395673726904,
"grad_norm": 2030.7421875,
"learning_rate": 2.5625e-05,
"loss": 115.6586,
"step": 4100
},
{
"epoch": 0.0946372239747634,
"grad_norm": 7973.09765625,
"learning_rate": 2.625e-05,
"loss": 117.5762,
"step": 4200
},
{
"epoch": 0.09689049121225778,
"grad_norm": 950.62060546875,
"learning_rate": 2.6875e-05,
"loss": 103.6103,
"step": 4300
},
{
"epoch": 0.09914375844975214,
"grad_norm": 825.5482177734375,
"learning_rate": 2.7500000000000004e-05,
"loss": 117.7064,
"step": 4400
},
{
"epoch": 0.10139702568724651,
"grad_norm": 4125.6982421875,
"learning_rate": 2.8125000000000003e-05,
"loss": 123.1462,
"step": 4500
},
{
"epoch": 0.10365029292474087,
"grad_norm": 5001.68212890625,
"learning_rate": 2.8749999999999997e-05,
"loss": 115.605,
"step": 4600
},
{
"epoch": 0.10590356016223525,
"grad_norm": 1243.29150390625,
"learning_rate": 2.9375000000000003e-05,
"loss": 117.9468,
"step": 4700
},
{
"epoch": 0.10815682739972961,
"grad_norm": 1567.5623779296875,
"learning_rate": 3e-05,
"loss": 108.8766,
"step": 4800
},
{
"epoch": 0.11041009463722397,
"grad_norm": 623.7001953125,
"learning_rate": 3.0625000000000006e-05,
"loss": 112.1779,
"step": 4900
},
{
"epoch": 0.11266336187471834,
"grad_norm": 923.6962890625,
"learning_rate": 3.125e-05,
"loss": 103.1436,
"step": 5000
},
{
"epoch": 0.1149166291122127,
"grad_norm": 1227.22802734375,
"learning_rate": 3.1875e-05,
"loss": 104.1329,
"step": 5100
},
{
"epoch": 0.11716989634970708,
"grad_norm": 1491.0849609375,
"learning_rate": 3.2500000000000004e-05,
"loss": 97.9254,
"step": 5200
},
{
"epoch": 0.11942316358720144,
"grad_norm": 0.0,
"learning_rate": 3.3125e-05,
"loss": 102.5234,
"step": 5300
},
{
"epoch": 0.12167643082469581,
"grad_norm": 1849.3809814453125,
"learning_rate": 3.375000000000001e-05,
"loss": 106.2018,
"step": 5400
},
{
"epoch": 0.12392969806219017,
"grad_norm": 1325.9857177734375,
"learning_rate": 3.4375e-05,
"loss": 109.5124,
"step": 5500
},
{
"epoch": 0.12618296529968454,
"grad_norm": 1684.9053955078125,
"learning_rate": 3.5e-05,
"loss": 100.4184,
"step": 5600
},
{
"epoch": 0.1284362325371789,
"grad_norm": 2124.258544921875,
"learning_rate": 3.5625000000000005e-05,
"loss": 106.471,
"step": 5700
},
{
"epoch": 0.13068949977467328,
"grad_norm": 771.2640380859375,
"learning_rate": 3.625e-05,
"loss": 102.0049,
"step": 5800
},
{
"epoch": 0.13294276701216765,
"grad_norm": 822.8856811523438,
"learning_rate": 3.6875e-05,
"loss": 105.0164,
"step": 5900
},
{
"epoch": 0.135196034249662,
"grad_norm": 1594.832763671875,
"learning_rate": 3.7500000000000003e-05,
"loss": 95.0338,
"step": 6000
},
{
"epoch": 0.13744930148715637,
"grad_norm": 2976.5205078125,
"learning_rate": 3.8125e-05,
"loss": 92.8634,
"step": 6100
},
{
"epoch": 0.13970256872465076,
"grad_norm": 1695.48681640625,
"learning_rate": 3.875e-05,
"loss": 91.5781,
"step": 6200
},
{
"epoch": 0.14195583596214512,
"grad_norm": 990.6913452148438,
"learning_rate": 3.9375e-05,
"loss": 90.6831,
"step": 6300
},
{
"epoch": 0.14420910319963948,
"grad_norm": 1220.7005615234375,
"learning_rate": 4e-05,
"loss": 91.974,
"step": 6400
},
{
"epoch": 0.14646237043713384,
"grad_norm": 4140.04345703125,
"learning_rate": 4.0625000000000005e-05,
"loss": 86.4978,
"step": 6500
},
{
"epoch": 0.1487156376746282,
"grad_norm": 2063.628173828125,
"learning_rate": 4.125e-05,
"loss": 93.2286,
"step": 6600
},
{
"epoch": 0.1509689049121226,
"grad_norm": 1446.00439453125,
"learning_rate": 4.1875e-05,
"loss": 85.1075,
"step": 6700
},
{
"epoch": 0.15322217214961695,
"grad_norm": 1431.2410888671875,
"learning_rate": 4.25e-05,
"loss": 90.0661,
"step": 6800
},
{
"epoch": 0.1554754393871113,
"grad_norm": 4932.36572265625,
"learning_rate": 4.3125000000000005e-05,
"loss": 90.6297,
"step": 6900
},
{
"epoch": 0.15772870662460567,
"grad_norm": 765.8735961914062,
"learning_rate": 4.375e-05,
"loss": 80.5397,
"step": 7000
},
{
"epoch": 0.15998197386210006,
"grad_norm": 4119.40673828125,
"learning_rate": 4.4375e-05,
"loss": 81.6614,
"step": 7100
},
{
"epoch": 0.16223524109959442,
"grad_norm": 1764.3875732421875,
"learning_rate": 4.5e-05,
"loss": 88.7843,
"step": 7200
},
{
"epoch": 0.16448850833708878,
"grad_norm": 762.2324829101562,
"learning_rate": 4.5625e-05,
"loss": 82.4272,
"step": 7300
},
{
"epoch": 0.16674177557458314,
"grad_norm": 1424.8287353515625,
"learning_rate": 4.6250000000000006e-05,
"loss": 90.781,
"step": 7400
},
{
"epoch": 0.1689950428120775,
"grad_norm": 1720.607421875,
"learning_rate": 4.6875e-05,
"loss": 83.0496,
"step": 7500
},
{
"epoch": 0.1712483100495719,
"grad_norm": 2539.0263671875,
"learning_rate": 4.75e-05,
"loss": 79.4029,
"step": 7600
},
{
"epoch": 0.17350157728706625,
"grad_norm": 6733.1591796875,
"learning_rate": 4.8125000000000004e-05,
"loss": 80.8961,
"step": 7700
},
{
"epoch": 0.1757548445245606,
"grad_norm": 3785.265869140625,
"learning_rate": 4.875e-05,
"loss": 77.2641,
"step": 7800
},
{
"epoch": 0.17800811176205497,
"grad_norm": 820.4760131835938,
"learning_rate": 4.937500000000001e-05,
"loss": 82.2043,
"step": 7900
},
{
"epoch": 0.18026137899954936,
"grad_norm": 717.660400390625,
"learning_rate": 5e-05,
"loss": 80.1946,
"step": 8000
},
{
"epoch": 0.18251464623704372,
"grad_norm": 1224.3692626953125,
"learning_rate": 4.999976201801837e-05,
"loss": 79.4519,
"step": 8100
},
{
"epoch": 0.18476791347453808,
"grad_norm": 974.4324951171875,
"learning_rate": 4.999904807660428e-05,
"loss": 75.99,
"step": 8200
},
{
"epoch": 0.18702118071203244,
"grad_norm": 3050.068359375,
"learning_rate": 4.999785818935018e-05,
"loss": 76.9602,
"step": 8300
},
{
"epoch": 0.1892744479495268,
"grad_norm": 2585.671630859375,
"learning_rate": 4.9996192378909786e-05,
"loss": 76.214,
"step": 8400
},
{
"epoch": 0.1915277151870212,
"grad_norm": 1460.472412109375,
"learning_rate": 4.999405067699773e-05,
"loss": 77.366,
"step": 8500
},
{
"epoch": 0.19378098242451555,
"grad_norm": 1475.039306640625,
"learning_rate": 4.999143312438893e-05,
"loss": 68.2295,
"step": 8600
},
{
"epoch": 0.1960342496620099,
"grad_norm": 1217.306396484375,
"learning_rate": 4.9988339770917825e-05,
"loss": 75.2926,
"step": 8700
},
{
"epoch": 0.19828751689950427,
"grad_norm": 1172.7359619140625,
"learning_rate": 4.99847706754774e-05,
"loss": 73.9257,
"step": 8800
},
{
"epoch": 0.20054078413699863,
"grad_norm": 1494.612548828125,
"learning_rate": 4.9980725906018074e-05,
"loss": 73.2696,
"step": 8900
},
{
"epoch": 0.20279405137449302,
"grad_norm": 698.787353515625,
"learning_rate": 4.997620553954645e-05,
"loss": 75.0695,
"step": 9000
},
{
"epoch": 0.20504731861198738,
"grad_norm": 1705.808349609375,
"learning_rate": 4.997120966212377e-05,
"loss": 69.9942,
"step": 9100
},
{
"epoch": 0.20730058584948174,
"grad_norm": 1201.5196533203125,
"learning_rate": 4.996573836886435e-05,
"loss": 72.2949,
"step": 9200
},
{
"epoch": 0.2095538530869761,
"grad_norm": 658.273193359375,
"learning_rate": 4.995979176393372e-05,
"loss": 72.9935,
"step": 9300
},
{
"epoch": 0.2118071203244705,
"grad_norm": 1103.2303466796875,
"learning_rate": 4.9953369960546676e-05,
"loss": 80.898,
"step": 9400
},
{
"epoch": 0.21406038756196485,
"grad_norm": 872.4283447265625,
"learning_rate": 4.994647308096509e-05,
"loss": 70.9107,
"step": 9500
},
{
"epoch": 0.21631365479945922,
"grad_norm": 2087.058349609375,
"learning_rate": 4.993910125649561e-05,
"loss": 73.337,
"step": 9600
},
{
"epoch": 0.21856692203695358,
"grad_norm": 679.9100952148438,
"learning_rate": 4.9931254627487145e-05,
"loss": 62.9499,
"step": 9700
},
{
"epoch": 0.22082018927444794,
"grad_norm": 2349.733154296875,
"learning_rate": 4.99229333433282e-05,
"loss": 78.2534,
"step": 9800
},
{
"epoch": 0.22307345651194233,
"grad_norm": 942.2659912109375,
"learning_rate": 4.9914137562444044e-05,
"loss": 72.2439,
"step": 9900
},
{
"epoch": 0.22532672374943669,
"grad_norm": 4470.63916015625,
"learning_rate": 4.990486745229364e-05,
"loss": 70.3106,
"step": 10000
},
{
"epoch": 0.22757999098693105,
"grad_norm": 1237.9471435546875,
"learning_rate": 4.989512318936655e-05,
"loss": 71.3457,
"step": 10100
},
{
"epoch": 0.2298332582244254,
"grad_norm": 1329.12060546875,
"learning_rate": 4.988490495917947e-05,
"loss": 67.5981,
"step": 10200
},
{
"epoch": 0.2320865254619198,
"grad_norm": 935.1795654296875,
"learning_rate": 4.987421295627279e-05,
"loss": 76.1853,
"step": 10300
},
{
"epoch": 0.23433979269941416,
"grad_norm": 1066.6749267578125,
"learning_rate": 4.9863047384206835e-05,
"loss": 71.3548,
"step": 10400
},
{
"epoch": 0.23659305993690852,
"grad_norm": 1230.3519287109375,
"learning_rate": 4.985140845555799e-05,
"loss": 74.292,
"step": 10500
},
{
"epoch": 0.23884632717440288,
"grad_norm": 1099.618408203125,
"learning_rate": 4.983929639191469e-05,
"loss": 69.2547,
"step": 10600
},
{
"epoch": 0.24109959441189724,
"grad_norm": 1077.372314453125,
"learning_rate": 4.982671142387316e-05,
"loss": 65.676,
"step": 10700
},
{
"epoch": 0.24335286164939163,
"grad_norm": 0.0,
"learning_rate": 4.9813653791033057e-05,
"loss": 69.037,
"step": 10800
},
{
"epoch": 0.245606128886886,
"grad_norm": 923.2297973632812,
"learning_rate": 4.980012374199288e-05,
"loss": 62.7163,
"step": 10900
},
{
"epoch": 0.24785939612438035,
"grad_norm": 672.335693359375,
"learning_rate": 4.9786121534345265e-05,
"loss": 64.182,
"step": 11000
},
{
"epoch": 0.2501126633618747,
"grad_norm": 2046.09326171875,
"learning_rate": 4.977164743467206e-05,
"loss": 69.5933,
"step": 11100
},
{
"epoch": 0.25236593059936907,
"grad_norm": 466.8750305175781,
"learning_rate": 4.975670171853926e-05,
"loss": 68.209,
"step": 11200
},
{
"epoch": 0.25461919783686343,
"grad_norm": 1894.697998046875,
"learning_rate": 4.974128467049176e-05,
"loss": 67.2639,
"step": 11300
},
{
"epoch": 0.2568724650743578,
"grad_norm": 1386.5361328125,
"learning_rate": 4.9725396584047925e-05,
"loss": 69.1241,
"step": 11400
},
{
"epoch": 0.2591257323118522,
"grad_norm": 2561.32275390625,
"learning_rate": 4.970903776169402e-05,
"loss": 68.5936,
"step": 11500
},
{
"epoch": 0.26137899954934657,
"grad_norm": 1006.3966064453125,
"learning_rate": 4.9692208514878444e-05,
"loss": 63.8572,
"step": 11600
},
{
"epoch": 0.26363226678684093,
"grad_norm": 1034.1533203125,
"learning_rate": 4.96749091640058e-05,
"loss": 60.131,
"step": 11700
},
{
"epoch": 0.2658855340243353,
"grad_norm": 468.8936462402344,
"learning_rate": 4.965714003843079e-05,
"loss": 62.9078,
"step": 11800
},
{
"epoch": 0.26813880126182965,
"grad_norm": 545.410888671875,
"learning_rate": 4.9638901476451946e-05,
"loss": 66.2326,
"step": 11900
},
{
"epoch": 0.270392068499324,
"grad_norm": 4246.572265625,
"learning_rate": 4.962019382530521e-05,
"loss": 66.3811,
"step": 12000
},
{
"epoch": 0.2726453357368184,
"grad_norm": 1565.358642578125,
"learning_rate": 4.960101744115727e-05,
"loss": 64.098,
"step": 12100
},
{
"epoch": 0.27489860297431273,
"grad_norm": 949.9479370117188,
"learning_rate": 4.958137268909887e-05,
"loss": 63.714,
"step": 12200
},
{
"epoch": 0.2771518702118071,
"grad_norm": 1055.019775390625,
"learning_rate": 4.956125994313774e-05,
"loss": 69.4046,
"step": 12300
},
{
"epoch": 0.2794051374493015,
"grad_norm": 2204.83935546875,
"learning_rate": 4.9540679586191605e-05,
"loss": 62.6045,
"step": 12400
},
{
"epoch": 0.28165840468679587,
"grad_norm": 607.0169067382812,
"learning_rate": 4.951963201008076e-05,
"loss": 62.7449,
"step": 12500
},
{
"epoch": 0.28391167192429023,
"grad_norm": 1606.60888671875,
"learning_rate": 4.949811761552074e-05,
"loss": 64.348,
"step": 12600
},
{
"epoch": 0.2861649391617846,
"grad_norm": 1324.9617919921875,
"learning_rate": 4.94761368121146e-05,
"loss": 63.6378,
"step": 12700
},
{
"epoch": 0.28841820639927895,
"grad_norm": 614.9827270507812,
"learning_rate": 4.9453690018345144e-05,
"loss": 64.5319,
"step": 12800
},
{
"epoch": 0.2906714736367733,
"grad_norm": 620.3219604492188,
"learning_rate": 4.943077766156697e-05,
"loss": 60.6608,
"step": 12900
},
{
"epoch": 0.2929247408742677,
"grad_norm": 1146.55126953125,
"learning_rate": 4.940740017799833e-05,
"loss": 60.1688,
"step": 13000
},
{
"epoch": 0.29517800811176204,
"grad_norm": 459.2867126464844,
"learning_rate": 4.938355801271282e-05,
"loss": 67.1272,
"step": 13100
},
{
"epoch": 0.2974312753492564,
"grad_norm": 1162.4820556640625,
"learning_rate": 4.9359251619630886e-05,
"loss": 59.3066,
"step": 13200
},
{
"epoch": 0.2996845425867508,
"grad_norm": 718.1458129882812,
"learning_rate": 4.9334481461511215e-05,
"loss": 62.1788,
"step": 13300
},
{
"epoch": 0.3019378098242452,
"grad_norm": 1042.899169921875,
"learning_rate": 4.9309248009941914e-05,
"loss": 63.2037,
"step": 13400
},
{
"epoch": 0.30419107706173953,
"grad_norm": 350.5531005859375,
"learning_rate": 4.9283551745331534e-05,
"loss": 60.271,
"step": 13500
},
{
"epoch": 0.3064443442992339,
"grad_norm": 622.0228271484375,
"learning_rate": 4.925739315689991e-05,
"loss": 58.0221,
"step": 13600
},
{
"epoch": 0.30869761153672826,
"grad_norm": 588.911865234375,
"learning_rate": 4.9230772742668866e-05,
"loss": 58.5713,
"step": 13700
},
{
"epoch": 0.3109508787742226,
"grad_norm": 1807.9488525390625,
"learning_rate": 4.92036910094527e-05,
"loss": 63.6907,
"step": 13800
},
{
"epoch": 0.313204146011717,
"grad_norm": 1166.9796142578125,
"learning_rate": 4.9176148472848584e-05,
"loss": 68.026,
"step": 13900
},
{
"epoch": 0.31545741324921134,
"grad_norm": 1430.736572265625,
"learning_rate": 4.914814565722671e-05,
"loss": 60.4505,
"step": 14000
},
{
"epoch": 0.3177106804867057,
"grad_norm": 678.8543701171875,
"learning_rate": 4.9119683095720324e-05,
"loss": 58.989,
"step": 14100
},
{
"epoch": 0.3199639477242001,
"grad_norm": 1388.1710205078125,
"learning_rate": 4.909076133021557e-05,
"loss": 61.1404,
"step": 14200
},
{
"epoch": 0.3222172149616945,
"grad_norm": 1086.327392578125,
"learning_rate": 4.906138091134118e-05,
"loss": 60.7471,
"step": 14300
},
{
"epoch": 0.32447048219918884,
"grad_norm": 1210.8642578125,
"learning_rate": 4.9031542398457974e-05,
"loss": 58.4001,
"step": 14400
},
{
"epoch": 0.3267237494366832,
"grad_norm": 2312.1982421875,
"learning_rate": 4.9001246359648224e-05,
"loss": 62.6361,
"step": 14500
},
{
"epoch": 0.32897701667417756,
"grad_norm": 1599.8291015625,
"learning_rate": 4.8970493371704826e-05,
"loss": 56.6325,
"step": 14600
},
{
"epoch": 0.3312302839116719,
"grad_norm": 1875.2379150390625,
"learning_rate": 4.8939284020120363e-05,
"loss": 62.8075,
"step": 14700
},
{
"epoch": 0.3334835511491663,
"grad_norm": 767.9666748046875,
"learning_rate": 4.890761889907589e-05,
"loss": 60.694,
"step": 14800
},
{
"epoch": 0.33573681838666064,
"grad_norm": 827.0344848632812,
"learning_rate": 4.8875498611429674e-05,
"loss": 62.9213,
"step": 14900
},
{
"epoch": 0.337990085624155,
"grad_norm": 536.530029296875,
"learning_rate": 4.884292376870567e-05,
"loss": 63.3655,
"step": 15000
},
{
"epoch": 0.3402433528616494,
"grad_norm": 722.9906005859375,
"learning_rate": 4.8809894991081964e-05,
"loss": 59.6742,
"step": 15100
},
{
"epoch": 0.3424966200991438,
"grad_norm": 514.8473510742188,
"learning_rate": 4.877641290737884e-05,
"loss": 54.7803,
"step": 15200
},
{
"epoch": 0.34474988733663814,
"grad_norm": 504.1208801269531,
"learning_rate": 4.874247815504693e-05,
"loss": 62.341,
"step": 15300
},
{
"epoch": 0.3470031545741325,
"grad_norm": 1017.212646484375,
"learning_rate": 4.8708091380154984e-05,
"loss": 60.4433,
"step": 15400
},
{
"epoch": 0.34925642181162686,
"grad_norm": 1013.2379760742188,
"learning_rate": 4.867325323737765e-05,
"loss": 60.4723,
"step": 15500
},
{
"epoch": 0.3515096890491212,
"grad_norm": 1699.07861328125,
"learning_rate": 4.8637964389982926e-05,
"loss": 58.7195,
"step": 15600
},
{
"epoch": 0.3537629562866156,
"grad_norm": 672.705078125,
"learning_rate": 4.860222550981961e-05,
"loss": 58.9785,
"step": 15700
},
{
"epoch": 0.35601622352410994,
"grad_norm": 1399.3095703125,
"learning_rate": 4.856603727730447e-05,
"loss": 63.84,
"step": 15800
},
{
"epoch": 0.3582694907616043,
"grad_norm": 885.7625122070312,
"learning_rate": 4.852940038140927e-05,
"loss": 61.7109,
"step": 15900
},
{
"epoch": 0.3605227579990987,
"grad_norm": 943.9271240234375,
"learning_rate": 4.849231551964771e-05,
"loss": 57.4755,
"step": 16000
},
{
"epoch": 0.3627760252365931,
"grad_norm": 696.8076782226562,
"learning_rate": 4.8454783398062106e-05,
"loss": 60.5156,
"step": 16100
},
{
"epoch": 0.36502929247408744,
"grad_norm": 0.0,
"learning_rate": 4.8416804731209945e-05,
"loss": 57.4523,
"step": 16200
},
{
"epoch": 0.3672825597115818,
"grad_norm": 1032.58154296875,
"learning_rate": 4.83783802421503e-05,
"loss": 59.4625,
"step": 16300
},
{
"epoch": 0.36953582694907616,
"grad_norm": 977.2448120117188,
"learning_rate": 4.8339510662430046e-05,
"loss": 56.669,
"step": 16400
},
{
"epoch": 0.3717890941865705,
"grad_norm": 674.7769165039062,
"learning_rate": 4.830019673206997e-05,
"loss": 56.777,
"step": 16500
},
{
"epoch": 0.3740423614240649,
"grad_norm": 477.216552734375,
"learning_rate": 4.826043919955062e-05,
"loss": 58.6639,
"step": 16600
},
{
"epoch": 0.37629562866155924,
"grad_norm": 350.3633728027344,
"learning_rate": 4.822023882179811e-05,
"loss": 62.8859,
"step": 16700
},
{
"epoch": 0.3785488958990536,
"grad_norm": 2964.301025390625,
"learning_rate": 4.817959636416969e-05,
"loss": 60.454,
"step": 16800
},
{
"epoch": 0.380802163136548,
"grad_norm": 1536.1146240234375,
"learning_rate": 4.813851260043916e-05,
"loss": 59.736,
"step": 16900
},
{
"epoch": 0.3830554303740424,
"grad_norm": 477.4852600097656,
"learning_rate": 4.8096988312782174e-05,
"loss": 61.6154,
"step": 17000
},
{
"epoch": 0.38530869761153674,
"grad_norm": 1278.5770263671875,
"learning_rate": 4.80550242917613e-05,
"loss": 60.3102,
"step": 17100
},
{
"epoch": 0.3875619648490311,
"grad_norm": 345.54974365234375,
"learning_rate": 4.8012621336311016e-05,
"loss": 52.0934,
"step": 17200
},
{
"epoch": 0.38981523208652546,
"grad_norm": 1254.111083984375,
"learning_rate": 4.796978025372246e-05,
"loss": 54.1746,
"step": 17300
},
{
"epoch": 0.3920684993240198,
"grad_norm": 2557.150146484375,
"learning_rate": 4.79265018596281e-05,
"loss": 61.9285,
"step": 17400
},
{
"epoch": 0.3943217665615142,
"grad_norm": 1335.9613037109375,
"learning_rate": 4.788278697798618e-05,
"loss": 54.5102,
"step": 17500
},
{
"epoch": 0.39657503379900855,
"grad_norm": 1397.8980712890625,
"learning_rate": 4.783863644106502e-05,
"loss": 56.5746,
"step": 17600
},
{
"epoch": 0.3988283010365029,
"grad_norm": 1394.6300048828125,
"learning_rate": 4.7794051089427214e-05,
"loss": 62.5688,
"step": 17700
},
{
"epoch": 0.40108156827399727,
"grad_norm": 1244.0089111328125,
"learning_rate": 4.7749031771913584e-05,
"loss": 55.5008,
"step": 17800
},
{
"epoch": 0.4033348355114917,
"grad_norm": 1220.64208984375,
"learning_rate": 4.7703579345627035e-05,
"loss": 58.7905,
"step": 17900
},
{
"epoch": 0.40558810274898605,
"grad_norm": 897.0527954101562,
"learning_rate": 4.765769467591625e-05,
"loss": 57.8451,
"step": 18000
},
{
"epoch": 0.4078413699864804,
"grad_norm": 2359.15380859375,
"learning_rate": 4.761137863635921e-05,
"loss": 55.2025,
"step": 18100
},
{
"epoch": 0.41009463722397477,
"grad_norm": 1171.134765625,
"learning_rate": 4.756463210874652e-05,
"loss": 58.9177,
"step": 18200
},
{
"epoch": 0.41234790446146913,
"grad_norm": 1182.134521484375,
"learning_rate": 4.7517455983064694e-05,
"loss": 58.9359,
"step": 18300
},
{
"epoch": 0.4146011716989635,
"grad_norm": 866.15478515625,
"learning_rate": 4.7469851157479177e-05,
"loss": 60.2409,
"step": 18400
},
{
"epoch": 0.41685443893645785,
"grad_norm": 929.9954833984375,
"learning_rate": 4.742181853831721e-05,
"loss": 59.9906,
"step": 18500
},
{
"epoch": 0.4191077061739522,
"grad_norm": 862.0828247070312,
"learning_rate": 4.737335904005063e-05,
"loss": 53.0986,
"step": 18600
},
{
"epoch": 0.42136097341144657,
"grad_norm": 1020.1170654296875,
"learning_rate": 4.732447358527843e-05,
"loss": 53.5393,
"step": 18700
},
{
"epoch": 0.423614240648941,
"grad_norm": 963.4601440429688,
"learning_rate": 4.72751631047092e-05,
"loss": 53.7475,
"step": 18800
},
{
"epoch": 0.42586750788643535,
"grad_norm": 935.1727294921875,
"learning_rate": 4.722542853714341e-05,
"loss": 58.3295,
"step": 18900
},
{
"epoch": 0.4281207751239297,
"grad_norm": 1638.548828125,
"learning_rate": 4.717527082945554e-05,
"loss": 54.7367,
"step": 19000
},
{
"epoch": 0.43037404236142407,
"grad_norm": 1193.1241455078125,
"learning_rate": 4.712469093657605e-05,
"loss": 55.434,
"step": 19100
},
{
"epoch": 0.43262730959891843,
"grad_norm": 1653.6728515625,
"learning_rate": 4.707368982147318e-05,
"loss": 59.1439,
"step": 19200
},
{
"epoch": 0.4348805768364128,
"grad_norm": 880.736572265625,
"learning_rate": 4.7022268455134646e-05,
"loss": 55.1307,
"step": 19300
},
{
"epoch": 0.43713384407390715,
"grad_norm": 540.3501586914062,
"learning_rate": 4.697042781654913e-05,
"loss": 58.1672,
"step": 19400
},
{
"epoch": 0.4393871113114015,
"grad_norm": 1146.242431640625,
"learning_rate": 4.69181688926877e-05,
"loss": 56.7247,
"step": 19500
},
{
"epoch": 0.4416403785488959,
"grad_norm": 761.1096801757812,
"learning_rate": 4.6865492678484895e-05,
"loss": 58.7413,
"step": 19600
},
{
"epoch": 0.4438936457863903,
"grad_norm": 840.3340454101562,
"learning_rate": 4.681240017681993e-05,
"loss": 53.7744,
"step": 19700
},
{
"epoch": 0.44614691302388465,
"grad_norm": 1110.4306640625,
"learning_rate": 4.6758892398497494e-05,
"loss": 55.2062,
"step": 19800
},
{
"epoch": 0.448400180261379,
"grad_norm": 1429.2696533203125,
"learning_rate": 4.670497036222856e-05,
"loss": 54.0191,
"step": 19900
},
{
"epoch": 0.45065344749887337,
"grad_norm": 1162.6312255859375,
"learning_rate": 4.665063509461097e-05,
"loss": 55.5365,
"step": 20000
},
{
"epoch": 0.45290671473636773,
"grad_norm": 1046.4312744140625,
"learning_rate": 4.659588763010989e-05,
"loss": 56.4798,
"step": 20100
},
{
"epoch": 0.4551599819738621,
"grad_norm": 1190.293212890625,
"learning_rate": 4.6540729011038146e-05,
"loss": 51.9015,
"step": 20200
},
{
"epoch": 0.45741324921135645,
"grad_norm": 1765.5267333984375,
"learning_rate": 4.648516028753632e-05,
"loss": 51.6989,
"step": 20300
},
{
"epoch": 0.4596665164488508,
"grad_norm": 717.52001953125,
"learning_rate": 4.642918251755281e-05,
"loss": 52.7842,
"step": 20400
},
{
"epoch": 0.4619197836863452,
"grad_norm": 621.5160522460938,
"learning_rate": 4.637279676682367e-05,
"loss": 57.9222,
"step": 20500
},
{
"epoch": 0.4641730509238396,
"grad_norm": 817.9072875976562,
"learning_rate": 4.6316004108852305e-05,
"loss": 56.0796,
"step": 20600
},
{
"epoch": 0.46642631816133395,
"grad_norm": 915.669921875,
"learning_rate": 4.6258805624889075e-05,
"loss": 56.7081,
"step": 20700
},
{
"epoch": 0.4686795853988283,
"grad_norm": 1008.6634521484375,
"learning_rate": 4.620120240391065e-05,
"loss": 59.5977,
"step": 20800
},
{
"epoch": 0.4709328526363227,
"grad_norm": 1886.614501953125,
"learning_rate": 4.614319554259934e-05,
"loss": 54.6364,
"step": 20900
},
{
"epoch": 0.47318611987381703,
"grad_norm": 1374.013671875,
"learning_rate": 4.608478614532215e-05,
"loss": 56.6813,
"step": 21000
},
{
"epoch": 0.4754393871113114,
"grad_norm": 615.3844604492188,
"learning_rate": 4.602597532410981e-05,
"loss": 53.3524,
"step": 21100
},
{
"epoch": 0.47769265434880576,
"grad_norm": 2033.61962890625,
"learning_rate": 4.5966764198635606e-05,
"loss": 53.9897,
"step": 21200
},
{
"epoch": 0.4799459215863001,
"grad_norm": 2901.83642578125,
"learning_rate": 4.5907153896193985e-05,
"loss": 51.6291,
"step": 21300
},
{
"epoch": 0.4821991888237945,
"grad_norm": 1315.8509521484375,
"learning_rate": 4.5847145551679206e-05,
"loss": 48.8819,
"step": 21400
},
{
"epoch": 0.4844524560612889,
"grad_norm": 1098.4552001953125,
"learning_rate": 4.5786740307563636e-05,
"loss": 49.5009,
"step": 21500
},
{
"epoch": 0.48670572329878325,
"grad_norm": 1847.3548583984375,
"learning_rate": 4.572593931387604e-05,
"loss": 53.9551,
"step": 21600
},
{
"epoch": 0.4889589905362776,
"grad_norm": 965.4677734375,
"learning_rate": 4.566474372817972e-05,
"loss": 52.8141,
"step": 21700
},
{
"epoch": 0.491212257773772,
"grad_norm": 801.56005859375,
"learning_rate": 4.5603154715550386e-05,
"loss": 56.9722,
"step": 21800
},
{
"epoch": 0.49346552501126634,
"grad_norm": 1328.810546875,
"learning_rate": 4.55411734485541e-05,
"loss": 58.1957,
"step": 21900
},
{
"epoch": 0.4957187922487607,
"grad_norm": 4401.68115234375,
"learning_rate": 4.54788011072248e-05,
"loss": 57.8721,
"step": 22000
},
{
"epoch": 0.49797205948625506,
"grad_norm": 3651.903564453125,
"learning_rate": 4.541603887904198e-05,
"loss": 53.8822,
"step": 22100
},
{
"epoch": 0.5002253267237494,
"grad_norm": 490.0090637207031,
"learning_rate": 4.535288795890798e-05,
"loss": 53.9883,
"step": 22200
},
{
"epoch": 0.5024785939612438,
"grad_norm": 492.57373046875,
"learning_rate": 4.528934954912531e-05,
"loss": 48.5514,
"step": 22300
},
{
"epoch": 0.5047318611987381,
"grad_norm": 377.37811279296875,
"learning_rate": 4.522542485937369e-05,
"loss": 59.0365,
"step": 22400
},
{
"epoch": 0.5069851284362326,
"grad_norm": 2599.677001953125,
"learning_rate": 4.516111510668707e-05,
"loss": 55.2056,
"step": 22500
},
{
"epoch": 0.5092383956737269,
"grad_norm": 646.6531372070312,
"learning_rate": 4.509642151543043e-05,
"loss": 55.0129,
"step": 22600
},
{
"epoch": 0.5114916629112213,
"grad_norm": 639.3536987304688,
"learning_rate": 4.503134531727652e-05,
"loss": 56.7601,
"step": 22700
},
{
"epoch": 0.5137449301487156,
"grad_norm": 991.4733276367188,
"learning_rate": 4.496588775118232e-05,
"loss": 54.7894,
"step": 22800
},
{
"epoch": 0.51599819738621,
"grad_norm": 577.932373046875,
"learning_rate": 4.490005006336555e-05,
"loss": 55.8032,
"step": 22900
},
{
"epoch": 0.5182514646237044,
"grad_norm": 973.028564453125,
"learning_rate": 4.4833833507280884e-05,
"loss": 55.5173,
"step": 23000
},
{
"epoch": 0.5205047318611987,
"grad_norm": 1144.3153076171875,
"learning_rate": 4.476723934359609e-05,
"loss": 54.9895,
"step": 23100
},
{
"epoch": 0.5227579990986931,
"grad_norm": 1019.5435180664062,
"learning_rate": 4.4700268840168045e-05,
"loss": 52.9062,
"step": 23200
},
{
"epoch": 0.5250112663361874,
"grad_norm": 874.452880859375,
"learning_rate": 4.463292327201862e-05,
"loss": 52.9759,
"step": 23300
},
{
"epoch": 0.5272645335736819,
"grad_norm": 578.9571533203125,
"learning_rate": 4.456520392131035e-05,
"loss": 49.5633,
"step": 23400
},
{
"epoch": 0.5295178008111762,
"grad_norm": 1387.6461181640625,
"learning_rate": 4.4497112077322044e-05,
"loss": 53.8476,
"step": 23500
},
{
"epoch": 0.5317710680486706,
"grad_norm": 5335.48681640625,
"learning_rate": 4.442864903642428e-05,
"loss": 54.16,
"step": 23600
},
{
"epoch": 0.5340243352861649,
"grad_norm": 1112.384033203125,
"learning_rate": 4.435981610205464e-05,
"loss": 53.531,
"step": 23700
},
{
"epoch": 0.5362776025236593,
"grad_norm": 1009.0767822265625,
"learning_rate": 4.4290614584693004e-05,
"loss": 52.0493,
"step": 23800
},
{
"epoch": 0.5385308697611537,
"grad_norm": 395.3855285644531,
"learning_rate": 4.4221045801836494e-05,
"loss": 49.3474,
"step": 23900
},
{
"epoch": 0.540784136998648,
"grad_norm": 785.1640014648438,
"learning_rate": 4.415111107797445e-05,
"loss": 54.9418,
"step": 24000
},
{
"epoch": 0.5430374042361424,
"grad_norm": 650.4179077148438,
"learning_rate": 4.408081174456322e-05,
"loss": 53.81,
"step": 24100
},
{
"epoch": 0.5452906714736367,
"grad_norm": 1124.389892578125,
"learning_rate": 4.401014914000078e-05,
"loss": 53.7556,
"step": 24200
},
{
"epoch": 0.5475439387111312,
"grad_norm": 891.2913818359375,
"learning_rate": 4.393912460960124e-05,
"loss": 49.5907,
"step": 24300
},
{
"epoch": 0.5497972059486255,
"grad_norm": 1058.59228515625,
"learning_rate": 4.386773950556931e-05,
"loss": 55.9397,
"step": 24400
},
{
"epoch": 0.5520504731861199,
"grad_norm": 1821.5123291015625,
"learning_rate": 4.379599518697444e-05,
"loss": 53.1642,
"step": 24500
},
{
"epoch": 0.5543037404236142,
"grad_norm": 3784.1845703125,
"learning_rate": 4.372389301972506e-05,
"loss": 54.2371,
"step": 24600
},
{
"epoch": 0.5565570076611086,
"grad_norm": 1150.9434814453125,
"learning_rate": 4.3651434376542486e-05,
"loss": 53.3396,
"step": 24700
},
{
"epoch": 0.558810274898603,
"grad_norm": 0.0,
"learning_rate": 4.357862063693486e-05,
"loss": 47.3,
"step": 24800
},
{
"epoch": 0.5610635421360973,
"grad_norm": 7060.201171875,
"learning_rate": 4.3505453187170805e-05,
"loss": 54.3783,
"step": 24900
},
{
"epoch": 0.5633168093735917,
"grad_norm": 751.5445556640625,
"learning_rate": 4.34319334202531e-05,
"loss": 57.37,
"step": 25000
},
{
"epoch": 0.565570076611086,
"grad_norm": 378.6798095703125,
"learning_rate": 4.335806273589214e-05,
"loss": 49.7573,
"step": 25100
},
{
"epoch": 0.5678233438485805,
"grad_norm": 669.6986083984375,
"learning_rate": 4.3283842540479264e-05,
"loss": 50.9783,
"step": 25200
},
{
"epoch": 0.5700766110860748,
"grad_norm": 737.0020751953125,
"learning_rate": 4.3209274247060004e-05,
"loss": 52.4549,
"step": 25300
},
{
"epoch": 0.5723298783235692,
"grad_norm": 626.1477661132812,
"learning_rate": 4.313435927530719e-05,
"loss": 54.6302,
"step": 25400
},
{
"epoch": 0.5745831455610635,
"grad_norm": 866.2355346679688,
"learning_rate": 4.305909905149389e-05,
"loss": 53.9909,
"step": 25500
},
{
"epoch": 0.5768364127985579,
"grad_norm": 327.4191589355469,
"learning_rate": 4.2983495008466276e-05,
"loss": 53.452,
"step": 25600
},
{
"epoch": 0.5790896800360523,
"grad_norm": 0.0,
"learning_rate": 4.290754858561637e-05,
"loss": 51.8147,
"step": 25700
},
{
"epoch": 0.5813429472735466,
"grad_norm": 1892.76416015625,
"learning_rate": 4.2831261228854544e-05,
"loss": 52.1455,
"step": 25800
},
{
"epoch": 0.583596214511041,
"grad_norm": 536.7318115234375,
"learning_rate": 4.275463439058214e-05,
"loss": 53.9379,
"step": 25900
},
{
"epoch": 0.5858494817485354,
"grad_norm": 750.3458251953125,
"learning_rate": 4.267766952966369e-05,
"loss": 47.4762,
"step": 26000
},
{
"epoch": 0.5881027489860298,
"grad_norm": 730.2880249023438,
"learning_rate": 4.260036811139921e-05,
"loss": 54.3805,
"step": 26100
},
{
"epoch": 0.5903560162235241,
"grad_norm": 600.27392578125,
"learning_rate": 4.2522731607496275e-05,
"loss": 56.3112,
"step": 26200
},
{
"epoch": 0.5926092834610185,
"grad_norm": 919.752197265625,
"learning_rate": 4.244476149604201e-05,
"loss": 48.9764,
"step": 26300
},
{
"epoch": 0.5948625506985128,
"grad_norm": 3348.833251953125,
"learning_rate": 4.2366459261474933e-05,
"loss": 48.8116,
"step": 26400
},
{
"epoch": 0.5971158179360072,
"grad_norm": 438.4606628417969,
"learning_rate": 4.228782639455674e-05,
"loss": 56.4488,
"step": 26500
},
{
"epoch": 0.5993690851735016,
"grad_norm": 654.3180541992188,
"learning_rate": 4.220886439234385e-05,
"loss": 48.5728,
"step": 26600
},
{
"epoch": 0.6016223524109959,
"grad_norm": 1312.3807373046875,
"learning_rate": 4.212957475815898e-05,
"loss": 51.3269,
"step": 26700
},
{
"epoch": 0.6038756196484903,
"grad_norm": 869.6898803710938,
"learning_rate": 4.2049959001562464e-05,
"loss": 54.4109,
"step": 26800
},
{
"epoch": 0.6061288868859847,
"grad_norm": 379.0628662109375,
"learning_rate": 4.197001863832355e-05,
"loss": 53.1673,
"step": 26900
},
{
"epoch": 0.6083821541234791,
"grad_norm": 569.8794555664062,
"learning_rate": 4.188975519039151e-05,
"loss": 48.3691,
"step": 27000
},
{
"epoch": 0.6106354213609734,
"grad_norm": 883.134765625,
"learning_rate": 4.18091701858667e-05,
"loss": 47.1612,
"step": 27100
},
{
"epoch": 0.6128886885984678,
"grad_norm": 1318.8486328125,
"learning_rate": 4.172826515897146e-05,
"loss": 51.9336,
"step": 27200
},
{
"epoch": 0.6151419558359621,
"grad_norm": 777.8239135742188,
"learning_rate": 4.164704165002086e-05,
"loss": 50.7255,
"step": 27300
},
{
"epoch": 0.6173952230734565,
"grad_norm": 704.6067504882812,
"learning_rate": 4.1565501205393445e-05,
"loss": 51.1446,
"step": 27400
},
{
"epoch": 0.6196484903109509,
"grad_norm": 649.2020263671875,
"learning_rate": 4.148364537750172e-05,
"loss": 53.8287,
"step": 27500
},
{
"epoch": 0.6219017575484452,
"grad_norm": 3303.88720703125,
"learning_rate": 4.140147572476268e-05,
"loss": 52.5814,
"step": 27600
},
{
"epoch": 0.6241550247859396,
"grad_norm": 750.2407836914062,
"learning_rate": 4.131899381156806e-05,
"loss": 52.0253,
"step": 27700
},
{
"epoch": 0.626408292023434,
"grad_norm": 862.5943603515625,
"learning_rate": 4.123620120825459e-05,
"loss": 54.8922,
"step": 27800
},
{
"epoch": 0.6286615592609284,
"grad_norm": 1933.6412353515625,
"learning_rate": 4.11530994910741e-05,
"loss": 50.71,
"step": 27900
},
{
"epoch": 0.6309148264984227,
"grad_norm": 833.4996337890625,
"learning_rate": 4.1069690242163484e-05,
"loss": 50.5416,
"step": 28000
},
{
"epoch": 0.6331680937359171,
"grad_norm": 737.939697265625,
"learning_rate": 4.098597504951462e-05,
"loss": 52.4712,
"step": 28100
},
{
"epoch": 0.6354213609734114,
"grad_norm": 608.831298828125,
"learning_rate": 4.09019555069441e-05,
"loss": 49.1117,
"step": 28200
},
{
"epoch": 0.6376746282109058,
"grad_norm": 450.9925231933594,
"learning_rate": 4.081763321406291e-05,
"loss": 50.1539,
"step": 28300
},
{
"epoch": 0.6399278954484002,
"grad_norm": 1044.352294921875,
"learning_rate": 4.073300977624594e-05,
"loss": 50.8113,
"step": 28400
},
{
"epoch": 0.6421811626858945,
"grad_norm": 847.0576782226562,
"learning_rate": 4.064808680460148e-05,
"loss": 53.6078,
"step": 28500
},
{
"epoch": 0.644434429923389,
"grad_norm": 1176.281005859375,
"learning_rate": 4.0562865915940496e-05,
"loss": 50.8178,
"step": 28600
},
{
"epoch": 0.6466876971608833,
"grad_norm": 1171.6334228515625,
"learning_rate": 4.047734873274586e-05,
"loss": 52.1101,
"step": 28700
},
{
"epoch": 0.6489409643983777,
"grad_norm": 2008.2913818359375,
"learning_rate": 4.039153688314145e-05,
"loss": 51.2683,
"step": 28800
},
{
"epoch": 0.651194231635872,
"grad_norm": 946.8640747070312,
"learning_rate": 4.030543200086123e-05,
"loss": 50.7668,
"step": 28900
},
{
"epoch": 0.6534474988733664,
"grad_norm": 718.6983032226562,
"learning_rate": 4.021903572521802e-05,
"loss": 46.3652,
"step": 29000
},
{
"epoch": 0.6557007661108607,
"grad_norm": 811.4840087890625,
"learning_rate": 4.013234970107236e-05,
"loss": 45.9392,
"step": 29100
},
{
"epoch": 0.6579540333483551,
"grad_norm": 2850.653076171875,
"learning_rate": 4.0045375578801214e-05,
"loss": 45.6765,
"step": 29200
},
{
"epoch": 0.6602073005858495,
"grad_norm": 1648.90087890625,
"learning_rate": 3.995811501426648e-05,
"loss": 55.2959,
"step": 29300
},
{
"epoch": 0.6624605678233438,
"grad_norm": 926.3929443359375,
"learning_rate": 3.9870569668783536e-05,
"loss": 47.1888,
"step": 29400
},
{
"epoch": 0.6647138350608383,
"grad_norm": 1050.4713134765625,
"learning_rate": 3.978274120908956e-05,
"loss": 46.4457,
"step": 29500
},
{
"epoch": 0.6669671022983326,
"grad_norm": 709.8076171875,
"learning_rate": 3.969463130731183e-05,
"loss": 59.8226,
"step": 29600
},
{
"epoch": 0.669220369535827,
"grad_norm": 2119.108154296875,
"learning_rate": 3.9606241640935864e-05,
"loss": 51.6369,
"step": 29700
},
{
"epoch": 0.6714736367733213,
"grad_norm": 861.7589111328125,
"learning_rate": 3.9517573892773494e-05,
"loss": 50.3213,
"step": 29800
},
{
"epoch": 0.6737269040108157,
"grad_norm": 858.5698852539062,
"learning_rate": 3.942862975093085e-05,
"loss": 53.4681,
"step": 29900
},
{
"epoch": 0.67598017124831,
"grad_norm": 1666.9454345703125,
"learning_rate": 3.933941090877615e-05,
"loss": 49.9129,
"step": 30000
},
{
"epoch": 0.6782334384858044,
"grad_norm": 766.8621826171875,
"learning_rate": 3.924991906490758e-05,
"loss": 50.3255,
"step": 30100
},
{
"epoch": 0.6804867057232988,
"grad_norm": 415.349853515625,
"learning_rate": 3.916015592312082e-05,
"loss": 44.5914,
"step": 30200
},
{
"epoch": 0.6827399729607931,
"grad_norm": 939.04443359375,
"learning_rate": 3.907012319237672e-05,
"loss": 48.9593,
"step": 30300
},
{
"epoch": 0.6849932401982876,
"grad_norm": 771.4345703125,
"learning_rate": 3.897982258676867e-05,
"loss": 49.2372,
"step": 30400
},
{
"epoch": 0.6872465074357819,
"grad_norm": 932.6231079101562,
"learning_rate": 3.888925582549006e-05,
"loss": 51.741,
"step": 30500
},
{
"epoch": 0.6894997746732763,
"grad_norm": 637.57470703125,
"learning_rate": 3.879842463280145e-05,
"loss": 52.6971,
"step": 30600
},
{
"epoch": 0.6917530419107706,
"grad_norm": 734.0154418945312,
"learning_rate": 3.870733073799785e-05,
"loss": 50.6578,
"step": 30700
},
{
"epoch": 0.694006309148265,
"grad_norm": 683.8228759765625,
"learning_rate": 3.861597587537568e-05,
"loss": 49.1606,
"step": 30800
},
{
"epoch": 0.6962595763857593,
"grad_norm": 414.70159912109375,
"learning_rate": 3.8524361784199853e-05,
"loss": 46.3476,
"step": 30900
},
{
"epoch": 0.6985128436232537,
"grad_norm": 1327.72265625,
"learning_rate": 3.84324902086706e-05,
"loss": 48.834,
"step": 31000
},
{
"epoch": 0.7007661108607481,
"grad_norm": 370.52392578125,
"learning_rate": 3.834036289789029e-05,
"loss": 46.4221,
"step": 31100
},
{
"epoch": 0.7030193780982424,
"grad_norm": 7749.1181640625,
"learning_rate": 3.824798160583012e-05,
"loss": 49.2808,
"step": 31200
},
{
"epoch": 0.7052726453357369,
"grad_norm": 1081.3465576171875,
"learning_rate": 3.8155348091296736e-05,
"loss": 48.1425,
"step": 31300
},
{
"epoch": 0.7075259125732312,
"grad_norm": 630.7388305664062,
"learning_rate": 3.8062464117898724e-05,
"loss": 50.6762,
"step": 31400
},
{
"epoch": 0.7097791798107256,
"grad_norm": 2894.5537109375,
"learning_rate": 3.796933145401304e-05,
"loss": 52.1551,
"step": 31500
},
{
"epoch": 0.7120324470482199,
"grad_norm": 3425.33642578125,
"learning_rate": 3.787595187275136e-05,
"loss": 45.0564,
"step": 31600
},
{
"epoch": 0.7142857142857143,
"grad_norm": 1268.707275390625,
"learning_rate": 3.77823271519263e-05,
"loss": 48.459,
"step": 31700
},
{
"epoch": 0.7165389815232086,
"grad_norm": 1132.0858154296875,
"learning_rate": 3.7688459074017606e-05,
"loss": 46.2624,
"step": 31800
},
{
"epoch": 0.718792248760703,
"grad_norm": 896.9110717773438,
"learning_rate": 3.759434942613816e-05,
"loss": 47.7103,
"step": 31900
},
{
"epoch": 0.7210455159981974,
"grad_norm": 1195.548095703125,
"learning_rate": 3.7500000000000003e-05,
"loss": 51.0907,
"step": 32000
},
{
"epoch": 0.7232987832356917,
"grad_norm": 2168.93017578125,
"learning_rate": 3.7405412591880215e-05,
"loss": 49.1653,
"step": 32100
},
{
"epoch": 0.7255520504731862,
"grad_norm": 1866.5477294921875,
"learning_rate": 3.731058900258668e-05,
"loss": 49.1327,
"step": 32200
},
{
"epoch": 0.7278053177106805,
"grad_norm": 624.8327026367188,
"learning_rate": 3.721553103742388e-05,
"loss": 50.1689,
"step": 32300
},
{
"epoch": 0.7300585849481749,
"grad_norm": 974.498779296875,
"learning_rate": 3.712024050615843e-05,
"loss": 50.1442,
"step": 32400
},
{
"epoch": 0.7323118521856692,
"grad_norm": 0.0,
"learning_rate": 3.702471922298469e-05,
"loss": 49.697,
"step": 32500
},
{
"epoch": 0.7345651194231636,
"grad_norm": 0.0,
"learning_rate": 3.692896900649021e-05,
"loss": 46.7989,
"step": 32600
},
{
"epoch": 0.7368183866606579,
"grad_norm": 0.0,
"learning_rate": 3.6832991679621086e-05,
"loss": 46.8958,
"step": 32700
},
{
"epoch": 0.7390716538981523,
"grad_norm": 592.7318115234375,
"learning_rate": 3.673678906964727e-05,
"loss": 46.7422,
"step": 32800
},
{
"epoch": 0.7413249211356467,
"grad_norm": 581.4388427734375,
"learning_rate": 3.6640363008127784e-05,
"loss": 47.2569,
"step": 32900
},
{
"epoch": 0.743578188373141,
"grad_norm": 462.7564392089844,
"learning_rate": 3.654371533087586e-05,
"loss": 47.8859,
"step": 33000
},
{
"epoch": 0.7458314556106355,
"grad_norm": 721.4935302734375,
"learning_rate": 3.644684787792392e-05,
"loss": 48.0441,
"step": 33100
},
{
"epoch": 0.7480847228481298,
"grad_norm": 574.1445922851562,
"learning_rate": 3.634976249348867e-05,
"loss": 49.1217,
"step": 33200
},
{
"epoch": 0.7503379900856242,
"grad_norm": 552.1611938476562,
"learning_rate": 3.625246102593588e-05,
"loss": 46.7031,
"step": 33300
},
{
"epoch": 0.7525912573231185,
"grad_norm": 442.0963439941406,
"learning_rate": 3.615494532774522e-05,
"loss": 47.0946,
"step": 33400
},
{
"epoch": 0.7548445245606129,
"grad_norm": 896.58837890625,
"learning_rate": 3.6057217255475034e-05,
"loss": 45.341,
"step": 33500
},
{
"epoch": 0.7570977917981072,
"grad_norm": 784.356201171875,
"learning_rate": 3.5959278669726935e-05,
"loss": 51.6972,
"step": 33600
},
{
"epoch": 0.7593510590356016,
"grad_norm": 1248.8477783203125,
"learning_rate": 3.586113143511043e-05,
"loss": 48.825,
"step": 33700
},
{
"epoch": 0.761604326273096,
"grad_norm": 875.2767333984375,
"learning_rate": 3.576277742020738e-05,
"loss": 52.262,
"step": 33800
},
{
"epoch": 0.7638575935105903,
"grad_norm": 596.1762084960938,
"learning_rate": 3.566421849753646e-05,
"loss": 52.5734,
"step": 33900
},
{
"epoch": 0.7661108607480848,
"grad_norm": 1226.5533447265625,
"learning_rate": 3.556545654351749e-05,
"loss": 48.9049,
"step": 34000
},
{
"epoch": 0.7683641279855791,
"grad_norm": 863.8980102539062,
"learning_rate": 3.54664934384357e-05,
"loss": 44.9694,
"step": 34100
},
{
"epoch": 0.7706173952230735,
"grad_norm": 1017.0120849609375,
"learning_rate": 3.536733106640598e-05,
"loss": 52.3725,
"step": 34200
},
{
"epoch": 0.7728706624605678,
"grad_norm": 572.596435546875,
"learning_rate": 3.526797131533693e-05,
"loss": 47.3554,
"step": 34300
},
{
"epoch": 0.7751239296980622,
"grad_norm": 397.4549560546875,
"learning_rate": 3.516841607689501e-05,
"loss": 54.4889,
"step": 34400
},
{
"epoch": 0.7773771969355565,
"grad_norm": 1202.6942138671875,
"learning_rate": 3.5068667246468436e-05,
"loss": 49.3039,
"step": 34500
},
{
"epoch": 0.7796304641730509,
"grad_norm": 989.998046875,
"learning_rate": 3.496872672313116e-05,
"loss": 42.5372,
"step": 34600
},
{
"epoch": 0.7818837314105452,
"grad_norm": 1453.26416015625,
"learning_rate": 3.486859640960668e-05,
"loss": 49.7227,
"step": 34700
},
{
"epoch": 0.7841369986480397,
"grad_norm": 416.6725769042969,
"learning_rate": 3.476827821223184e-05,
"loss": 46.2663,
"step": 34800
},
{
"epoch": 0.7863902658855341,
"grad_norm": 889.0680541992188,
"learning_rate": 3.466777404092052e-05,
"loss": 46.2791,
"step": 34900
},
{
"epoch": 0.7886435331230284,
"grad_norm": 573.9443969726562,
"learning_rate": 3.456708580912725e-05,
"loss": 45.8314,
"step": 35000
},
{
"epoch": 0.7908968003605228,
"grad_norm": 747.3670654296875,
"learning_rate": 3.446621543381083e-05,
"loss": 43.1904,
"step": 35100
},
{
"epoch": 0.7931500675980171,
"grad_norm": 884.3372192382812,
"learning_rate": 3.436516483539781e-05,
"loss": 48.3382,
"step": 35200
},
{
"epoch": 0.7954033348355115,
"grad_norm": 690.1305541992188,
"learning_rate": 3.426393593774591e-05,
"loss": 46.8459,
"step": 35300
},
{
"epoch": 0.7976566020730058,
"grad_norm": 3796.224365234375,
"learning_rate": 3.4162530668107434e-05,
"loss": 47.1848,
"step": 35400
},
{
"epoch": 0.7999098693105002,
"grad_norm": 765.8935546875,
"learning_rate": 3.406095095709254e-05,
"loss": 47.8368,
"step": 35500
},
{
"epoch": 0.8021631365479945,
"grad_norm": 1086.65625,
"learning_rate": 3.39591987386325e-05,
"loss": 51.3484,
"step": 35600
},
{
"epoch": 0.804416403785489,
"grad_norm": 507.65338134765625,
"learning_rate": 3.3857275949942893e-05,
"loss": 43.1595,
"step": 35700
},
{
"epoch": 0.8066696710229834,
"grad_norm": 5600.59521484375,
"learning_rate": 3.375518453148669e-05,
"loss": 47.4594,
"step": 35800
},
{
"epoch": 0.8089229382604777,
"grad_norm": 2263.270263671875,
"learning_rate": 3.365292642693732e-05,
"loss": 42.8212,
"step": 35900
},
{
"epoch": 0.8111762054979721,
"grad_norm": 761.8606567382812,
"learning_rate": 3.355050358314172e-05,
"loss": 44.9991,
"step": 36000
},
{
"epoch": 0.8134294727354664,
"grad_norm": 1336.81787109375,
"learning_rate": 3.344791795008318e-05,
"loss": 46.3603,
"step": 36100
},
{
"epoch": 0.8156827399729608,
"grad_norm": 726.6700439453125,
"learning_rate": 3.3345171480844275e-05,
"loss": 48.5755,
"step": 36200
},
{
"epoch": 0.8179360072104551,
"grad_norm": 0.0,
"learning_rate": 3.324226613156968e-05,
"loss": 47.3854,
"step": 36300
},
{
"epoch": 0.8201892744479495,
"grad_norm": 441.3335266113281,
"learning_rate": 3.313920386142892e-05,
"loss": 47.508,
"step": 36400
},
{
"epoch": 0.8224425416854438,
"grad_norm": 665.80322265625,
"learning_rate": 3.303598663257904e-05,
"loss": 49.6352,
"step": 36500
},
{
"epoch": 0.8246958089229383,
"grad_norm": 1343.923095703125,
"learning_rate": 3.293261641012731e-05,
"loss": 47.085,
"step": 36600
},
{
"epoch": 0.8269490761604327,
"grad_norm": 524.6836547851562,
"learning_rate": 3.2829095162093734e-05,
"loss": 47.4216,
"step": 36700
},
{
"epoch": 0.829202343397927,
"grad_norm": 1741.5296630859375,
"learning_rate": 3.272542485937369e-05,
"loss": 48.4512,
"step": 36800
},
{
"epoch": 0.8314556106354214,
"grad_norm": 507.85040283203125,
"learning_rate": 3.2621607475700275e-05,
"loss": 45.0441,
"step": 36900
},
{
"epoch": 0.8337088778729157,
"grad_norm": 694.7333374023438,
"learning_rate": 3.251764498760683e-05,
"loss": 45.3751,
"step": 37000
},
{
"epoch": 0.8359621451104101,
"grad_norm": 0.0,
"learning_rate": 3.241353937438927e-05,
"loss": 47.7383,
"step": 37100
},
{
"epoch": 0.8382154123479044,
"grad_norm": 535.8335571289062,
"learning_rate": 3.230929261806842e-05,
"loss": 50.2691,
"step": 37200
},
{
"epoch": 0.8404686795853988,
"grad_norm": 533.4066772460938,
"learning_rate": 3.2204906703352236e-05,
"loss": 48.8035,
"step": 37300
},
{
"epoch": 0.8427219468228931,
"grad_norm": 0.0,
"learning_rate": 3.210038361759807e-05,
"loss": 39.8506,
"step": 37400
},
{
"epoch": 0.8449752140603876,
"grad_norm": 2554.993408203125,
"learning_rate": 3.1995725350774806e-05,
"loss": 44.7332,
"step": 37500
},
{
"epoch": 0.847228481297882,
"grad_norm": 1027.2034912109375,
"learning_rate": 3.1890933895424976e-05,
"loss": 46.4409,
"step": 37600
},
{
"epoch": 0.8494817485353763,
"grad_norm": 1057.863037109375,
"learning_rate": 3.178601124662686e-05,
"loss": 44.3963,
"step": 37700
},
{
"epoch": 0.8517350157728707,
"grad_norm": 418.9128112792969,
"learning_rate": 3.168095940195642e-05,
"loss": 48.5565,
"step": 37800
},
{
"epoch": 0.853988283010365,
"grad_norm": 894.1998291015625,
"learning_rate": 3.157578036144937e-05,
"loss": 45.6854,
"step": 37900
},
{
"epoch": 0.8562415502478594,
"grad_norm": 2467.48486328125,
"learning_rate": 3.147047612756302e-05,
"loss": 45.3885,
"step": 38000
},
{
"epoch": 0.8584948174853537,
"grad_norm": 807.8389282226562,
"learning_rate": 3.136504870513819e-05,
"loss": 46.2842,
"step": 38100
},
{
"epoch": 0.8607480847228481,
"grad_norm": 1878.1251220703125,
"learning_rate": 3.125950010136104e-05,
"loss": 45.9782,
"step": 38200
},
{
"epoch": 0.8630013519603424,
"grad_norm": 1414.4879150390625,
"learning_rate": 3.115383232572483e-05,
"loss": 46.0757,
"step": 38300
},
{
"epoch": 0.8652546191978369,
"grad_norm": 1302.6700439453125,
"learning_rate": 3.104804738999169e-05,
"loss": 48.1511,
"step": 38400
},
{
"epoch": 0.8675078864353313,
"grad_norm": 810.4852905273438,
"learning_rate": 3.094214730815433e-05,
"loss": 42.2249,
"step": 38500
},
{
"epoch": 0.8697611536728256,
"grad_norm": 1316.186279296875,
"learning_rate": 3.083613409639764e-05,
"loss": 45.1847,
"step": 38600
},
{
"epoch": 0.87201442091032,
"grad_norm": 0.0,
"learning_rate": 3.073000977306036e-05,
"loss": 40.2777,
"step": 38700
},
{
"epoch": 0.8742676881478143,
"grad_norm": 574.6009521484375,
"learning_rate": 3.062377635859663e-05,
"loss": 44.6483,
"step": 38800
},
{
"epoch": 0.8765209553853087,
"grad_norm": 723.0553588867188,
"learning_rate": 3.0517435875537536e-05,
"loss": 46.7895,
"step": 38900
},
{
"epoch": 0.878774222622803,
"grad_norm": 626.0262451171875,
"learning_rate": 3.0410990348452573e-05,
"loss": 42.9903,
"step": 39000
},
{
"epoch": 0.8810274898602974,
"grad_norm": 384.9597473144531,
"learning_rate": 3.030444180391116e-05,
"loss": 52.3541,
"step": 39100
},
{
"epoch": 0.8832807570977917,
"grad_norm": 726.4917602539062,
"learning_rate": 3.0197792270443982e-05,
"loss": 43.4684,
"step": 39200
},
{
"epoch": 0.8855340243352862,
"grad_norm": 783.1016235351562,
"learning_rate": 3.0091043778504436e-05,
"loss": 48.3162,
"step": 39300
},
{
"epoch": 0.8877872915727806,
"grad_norm": 709.700439453125,
"learning_rate": 2.9984198360429932e-05,
"loss": 45.9866,
"step": 39400
},
{
"epoch": 0.8900405588102749,
"grad_norm": 766.4548950195312,
"learning_rate": 2.9877258050403212e-05,
"loss": 43.3167,
"step": 39500
},
{
"epoch": 0.8922938260477693,
"grad_norm": 547.8275756835938,
"learning_rate": 2.9770224884413623e-05,
"loss": 45.1045,
"step": 39600
},
{
"epoch": 0.8945470932852636,
"grad_norm": 1067.27197265625,
"learning_rate": 2.966310090021837e-05,
"loss": 45.1934,
"step": 39700
},
{
"epoch": 0.896800360522758,
"grad_norm": 906.0169677734375,
"learning_rate": 2.9555888137303695e-05,
"loss": 46.1919,
"step": 39800
},
{
"epoch": 0.8990536277602523,
"grad_norm": 693.871826171875,
"learning_rate": 2.9448588636846046e-05,
"loss": 42.2423,
"step": 39900
},
{
"epoch": 0.9013068949977467,
"grad_norm": 697.4348754882812,
"learning_rate": 2.9341204441673266e-05,
"loss": 46.1574,
"step": 40000
},
{
"epoch": 0.903560162235241,
"grad_norm": 896.8306274414062,
"learning_rate": 2.9233737596225613e-05,
"loss": 46.1825,
"step": 40100
},
{
"epoch": 0.9058134294727355,
"grad_norm": 0.0,
"learning_rate": 2.9126190146516942e-05,
"loss": 51.3986,
"step": 40200
},
{
"epoch": 0.9080666967102299,
"grad_norm": 228.38462829589844,
"learning_rate": 2.9018564140095657e-05,
"loss": 43.3596,
"step": 40300
},
{
"epoch": 0.9103199639477242,
"grad_norm": 1101.621826171875,
"learning_rate": 2.8910861626005776e-05,
"loss": 44.0146,
"step": 40400
},
{
"epoch": 0.9125732311852186,
"grad_norm": 913.1014404296875,
"learning_rate": 2.8803084654747918e-05,
"loss": 41.7263,
"step": 40500
},
{
"epoch": 0.9148264984227129,
"grad_norm": 499.08642578125,
"learning_rate": 2.8695235278240272e-05,
"loss": 47.2988,
"step": 40600
},
{
"epoch": 0.9170797656602073,
"grad_norm": 513.2366333007812,
"learning_rate": 2.858731554977948e-05,
"loss": 42.2196,
"step": 40700
},
{
"epoch": 0.9193330328977016,
"grad_norm": 876.3751220703125,
"learning_rate": 2.8479327524001636e-05,
"loss": 43.9035,
"step": 40800
},
{
"epoch": 0.921586300135196,
"grad_norm": 698.6486206054688,
"learning_rate": 2.837127325684308e-05,
"loss": 49.8259,
"step": 40900
},
{
"epoch": 0.9238395673726904,
"grad_norm": 522.728271484375,
"learning_rate": 2.8263154805501297e-05,
"loss": 40.6793,
"step": 41000
},
{
"epoch": 0.9260928346101848,
"grad_norm": 13431.1171875,
"learning_rate": 2.815497422839575e-05,
"loss": 47.0175,
"step": 41100
},
{
"epoch": 0.9283461018476792,
"grad_norm": 406.3808898925781,
"learning_rate": 2.8046733585128687e-05,
"loss": 49.5201,
"step": 41200
},
{
"epoch": 0.9305993690851735,
"grad_norm": 528.2371826171875,
"learning_rate": 2.7938434936445945e-05,
"loss": 46.2839,
"step": 41300
},
{
"epoch": 0.9328526363226679,
"grad_norm": 815.2286376953125,
"learning_rate": 2.7830080344197674e-05,
"loss": 43.2044,
"step": 41400
},
{
"epoch": 0.9351059035601622,
"grad_norm": 547.2005004882812,
"learning_rate": 2.7721671871299116e-05,
"loss": 44.5253,
"step": 41500
},
{
"epoch": 0.9373591707976566,
"grad_norm": 722.04931640625,
"learning_rate": 2.761321158169134e-05,
"loss": 41.0613,
"step": 41600
},
{
"epoch": 0.9396124380351509,
"grad_norm": 583.0802612304688,
"learning_rate": 2.7504701540301907e-05,
"loss": 48.6149,
"step": 41700
},
{
"epoch": 0.9418657052726453,
"grad_norm": 616.2501831054688,
"learning_rate": 2.7396143813005602e-05,
"loss": 45.1471,
"step": 41800
},
{
"epoch": 0.9441189725101397,
"grad_norm": 680.4635620117188,
"learning_rate": 2.7287540466585065e-05,
"loss": 48.1059,
"step": 41900
},
{
"epoch": 0.9463722397476341,
"grad_norm": 526.566162109375,
"learning_rate": 2.717889356869146e-05,
"loss": 41.3846,
"step": 42000
},
{
"epoch": 0.9486255069851285,
"grad_norm": 518.4340209960938,
"learning_rate": 2.7070205187805108e-05,
"loss": 48.1914,
"step": 42100
},
{
"epoch": 0.9508787742226228,
"grad_norm": 3767.502197265625,
"learning_rate": 2.6961477393196126e-05,
"loss": 46.3911,
"step": 42200
},
{
"epoch": 0.9531320414601172,
"grad_norm": 0.0,
"learning_rate": 2.6852712254884988e-05,
"loss": 45.1576,
"step": 42300
},
{
"epoch": 0.9553853086976115,
"grad_norm": 724.995849609375,
"learning_rate": 2.674391184360313e-05,
"loss": 43.0639,
"step": 42400
},
{
"epoch": 0.9576385759351059,
"grad_norm": 635.5662231445312,
"learning_rate": 2.663507823075358e-05,
"loss": 42.7185,
"step": 42500
},
{
"epoch": 0.9598918431726002,
"grad_norm": 397.24517822265625,
"learning_rate": 2.6526213488371427e-05,
"loss": 42.7067,
"step": 42600
},
{
"epoch": 0.9621451104100947,
"grad_norm": 1213.5648193359375,
"learning_rate": 2.641731968908444e-05,
"loss": 45.1527,
"step": 42700
},
{
"epoch": 0.964398377647589,
"grad_norm": 1316.8245849609375,
"learning_rate": 2.63083989060736e-05,
"loss": 45.5445,
"step": 42800
},
{
"epoch": 0.9666516448850834,
"grad_norm": 1318.21875,
"learning_rate": 2.6199453213033598e-05,
"loss": 44.2066,
"step": 42900
},
{
"epoch": 0.9689049121225778,
"grad_norm": 890.9985961914062,
"learning_rate": 2.6090484684133404e-05,
"loss": 41.8035,
"step": 43000
},
{
"epoch": 0.9711581793600721,
"grad_norm": 480.1272888183594,
"learning_rate": 2.598149539397672e-05,
"loss": 45.4357,
"step": 43100
},
{
"epoch": 0.9734114465975665,
"grad_norm": 566.5352172851562,
"learning_rate": 2.587248741756253e-05,
"loss": 47.8986,
"step": 43200
},
{
"epoch": 0.9756647138350608,
"grad_norm": 2252.18017578125,
"learning_rate": 2.5763462830245572e-05,
"loss": 45.0348,
"step": 43300
},
{
"epoch": 0.9779179810725552,
"grad_norm": 0.0,
"learning_rate": 2.5654423707696833e-05,
"loss": 46.8718,
"step": 43400
},
{
"epoch": 0.9801712483100495,
"grad_norm": 585.69775390625,
"learning_rate": 2.5545372125864032e-05,
"loss": 43.0343,
"step": 43500
},
{
"epoch": 0.982424515547544,
"grad_norm": 782.6468505859375,
"learning_rate": 2.5436310160932092e-05,
"loss": 44.3432,
"step": 43600
},
{
"epoch": 0.9846777827850383,
"grad_norm": 369.2841796875,
"learning_rate": 2.5327239889283612e-05,
"loss": 41.9938,
"step": 43700
},
{
"epoch": 0.9869310500225327,
"grad_norm": 836.7174072265625,
"learning_rate": 2.521816338745935e-05,
"loss": 44.0726,
"step": 43800
},
{
"epoch": 0.9891843172600271,
"grad_norm": 738.8544921875,
"learning_rate": 2.5109082732118665e-05,
"loss": 45.6463,
"step": 43900
},
{
"epoch": 0.9914375844975214,
"grad_norm": 912.4453735351562,
"learning_rate": 2.5e-05,
"loss": 45.5043,
"step": 44000
},
{
"epoch": 0.9936908517350158,
"grad_norm": 2161.161865234375,
"learning_rate": 2.4890917267881338e-05,
"loss": 43.3618,
"step": 44100
},
{
"epoch": 0.9959441189725101,
"grad_norm": 791.255859375,
"learning_rate": 2.4781836612540657e-05,
"loss": 46.0761,
"step": 44200
},
{
"epoch": 0.9981973862100045,
"grad_norm": 812.304443359375,
"learning_rate": 2.4672760110716394e-05,
"loss": 45.6475,
"step": 44300
},
{
"epoch": 1.0,
"eval_loss": 112.20081329345703,
"eval_runtime": 881.8427,
"eval_samples_per_second": 22.368,
"eval_steps_per_second": 5.593,
"step": 44380
},
{
"epoch": 1.0004506534474988,
"grad_norm": 418.41058349609375,
"learning_rate": 2.4563689839067913e-05,
"loss": 44.6088,
"step": 44400
},
{
"epoch": 1.0027039206849933,
"grad_norm": 484.3685607910156,
"learning_rate": 2.4454627874135974e-05,
"loss": 41.7446,
"step": 44500
},
{
"epoch": 1.0049571879224877,
"grad_norm": 422.8623352050781,
"learning_rate": 2.4345576292303176e-05,
"loss": 37.9431,
"step": 44600
},
{
"epoch": 1.0072104551599819,
"grad_norm": 612.1026000976562,
"learning_rate": 2.4236537169754437e-05,
"loss": 40.1997,
"step": 44700
},
{
"epoch": 1.0094637223974763,
"grad_norm": 634.6718139648438,
"learning_rate": 2.4127512582437485e-05,
"loss": 38.9043,
"step": 44800
},
{
"epoch": 1.0117169896349707,
"grad_norm": 660.1007080078125,
"learning_rate": 2.4018504606023293e-05,
"loss": 40.3333,
"step": 44900
},
{
"epoch": 1.0139702568724651,
"grad_norm": 513.296142578125,
"learning_rate": 2.3909515315866605e-05,
"loss": 42.9966,
"step": 45000
},
{
"epoch": 1.0162235241099595,
"grad_norm": 605.056640625,
"learning_rate": 2.3800546786966408e-05,
"loss": 39.9933,
"step": 45100
},
{
"epoch": 1.0184767913474537,
"grad_norm": 856.895751953125,
"learning_rate": 2.3691601093926404e-05,
"loss": 37.0164,
"step": 45200
},
{
"epoch": 1.0207300585849481,
"grad_norm": 413.656494140625,
"learning_rate": 2.3582680310915558e-05,
"loss": 38.9117,
"step": 45300
},
{
"epoch": 1.0229833258224426,
"grad_norm": 1099.9735107421875,
"learning_rate": 2.3473786511628575e-05,
"loss": 38.1765,
"step": 45400
},
{
"epoch": 1.025236593059937,
"grad_norm": 899.5122680664062,
"learning_rate": 2.3364921769246423e-05,
"loss": 38.9166,
"step": 45500
},
{
"epoch": 1.0274898602974312,
"grad_norm": 2411.383544921875,
"learning_rate": 2.3256088156396868e-05,
"loss": 38.2463,
"step": 45600
},
{
"epoch": 1.0297431275349256,
"grad_norm": 542.501708984375,
"learning_rate": 2.314728774511502e-05,
"loss": 39.2675,
"step": 45700
},
{
"epoch": 1.03199639477242,
"grad_norm": 566.046142578125,
"learning_rate": 2.303852260680388e-05,
"loss": 39.1687,
"step": 45800
},
{
"epoch": 1.0342496620099144,
"grad_norm": 267.0126953125,
"learning_rate": 2.2929794812194898e-05,
"loss": 41.1642,
"step": 45900
},
{
"epoch": 1.0365029292474088,
"grad_norm": 893.4733276367188,
"learning_rate": 2.2821106431308544e-05,
"loss": 41.2794,
"step": 46000
},
{
"epoch": 1.038756196484903,
"grad_norm": 353.1875305175781,
"learning_rate": 2.2712459533414944e-05,
"loss": 41.7903,
"step": 46100
},
{
"epoch": 1.0410094637223974,
"grad_norm": 749.4720458984375,
"learning_rate": 2.26038561869944e-05,
"loss": 40.0082,
"step": 46200
},
{
"epoch": 1.0432627309598919,
"grad_norm": 488.6062316894531,
"learning_rate": 2.24952984596981e-05,
"loss": 40.3988,
"step": 46300
},
{
"epoch": 1.0455159981973863,
"grad_norm": 1090.5692138671875,
"learning_rate": 2.238678841830867e-05,
"loss": 40.7657,
"step": 46400
},
{
"epoch": 1.0477692654348805,
"grad_norm": 1349.6646728515625,
"learning_rate": 2.2278328128700893e-05,
"loss": 37.8727,
"step": 46500
},
{
"epoch": 1.0500225326723749,
"grad_norm": 1600.57666015625,
"learning_rate": 2.2169919655802335e-05,
"loss": 41.3573,
"step": 46600
},
{
"epoch": 1.0522757999098693,
"grad_norm": 321.0033874511719,
"learning_rate": 2.2061565063554064e-05,
"loss": 38.5797,
"step": 46700
},
{
"epoch": 1.0545290671473637,
"grad_norm": 652.0059204101562,
"learning_rate": 2.195326641487132e-05,
"loss": 42.7963,
"step": 46800
},
{
"epoch": 1.0567823343848581,
"grad_norm": 325.46575927734375,
"learning_rate": 2.184502577160426e-05,
"loss": 34.0251,
"step": 46900
},
{
"epoch": 1.0590356016223523,
"grad_norm": 352.83941650390625,
"learning_rate": 2.173684519449872e-05,
"loss": 38.2834,
"step": 47000
},
{
"epoch": 1.0612888688598467,
"grad_norm": 1244.226318359375,
"learning_rate": 2.1628726743156933e-05,
"loss": 36.7162,
"step": 47100
},
{
"epoch": 1.0635421360973412,
"grad_norm": 1361.024658203125,
"learning_rate": 2.1520672475998373e-05,
"loss": 39.9933,
"step": 47200
},
{
"epoch": 1.0657954033348356,
"grad_norm": 0.0,
"learning_rate": 2.141268445022052e-05,
"loss": 42.092,
"step": 47300
},
{
"epoch": 1.0680486705723298,
"grad_norm": 681.4054565429688,
"learning_rate": 2.1304764721759733e-05,
"loss": 42.4679,
"step": 47400
},
{
"epoch": 1.0703019378098242,
"grad_norm": 683.2991943359375,
"learning_rate": 2.1196915345252084e-05,
"loss": 37.9367,
"step": 47500
},
{
"epoch": 1.0725552050473186,
"grad_norm": 412.36138916015625,
"learning_rate": 2.1089138373994223e-05,
"loss": 36.7721,
"step": 47600
},
{
"epoch": 1.074808472284813,
"grad_norm": 1283.1080322265625,
"learning_rate": 2.0981435859904346e-05,
"loss": 40.7471,
"step": 47700
},
{
"epoch": 1.0770617395223074,
"grad_norm": 885.2091064453125,
"learning_rate": 2.087380985348306e-05,
"loss": 40.407,
"step": 47800
},
{
"epoch": 1.0793150067598016,
"grad_norm": 1542.7694091796875,
"learning_rate": 2.0766262403774386e-05,
"loss": 36.4963,
"step": 47900
},
{
"epoch": 1.081568273997296,
"grad_norm": 827.3414916992188,
"learning_rate": 2.0658795558326743e-05,
"loss": 36.856,
"step": 48000
},
{
"epoch": 1.0838215412347905,
"grad_norm": 0.0,
"learning_rate": 2.055141136315396e-05,
"loss": 41.1057,
"step": 48100
},
{
"epoch": 1.0860748084722849,
"grad_norm": 460.4079284667969,
"learning_rate": 2.0444111862696314e-05,
"loss": 41.1531,
"step": 48200
},
{
"epoch": 1.088328075709779,
"grad_norm": 955.0888061523438,
"learning_rate": 2.0336899099781636e-05,
"loss": 37.0588,
"step": 48300
},
{
"epoch": 1.0905813429472735,
"grad_norm": 565.38720703125,
"learning_rate": 2.022977511558638e-05,
"loss": 43.0631,
"step": 48400
},
{
"epoch": 1.092834610184768,
"grad_norm": 471.39630126953125,
"learning_rate": 2.0122741949596797e-05,
"loss": 40.6631,
"step": 48500
},
{
"epoch": 1.0950878774222623,
"grad_norm": 2771.78759765625,
"learning_rate": 2.0015801639570074e-05,
"loss": 38.7804,
"step": 48600
},
{
"epoch": 1.0973411446597567,
"grad_norm": 921.8468627929688,
"learning_rate": 1.9908956221495567e-05,
"loss": 39.7282,
"step": 48700
},
{
"epoch": 1.099594411897251,
"grad_norm": 722.0336303710938,
"learning_rate": 1.980220772955602e-05,
"loss": 39.0169,
"step": 48800
},
{
"epoch": 1.1018476791347454,
"grad_norm": 5098.97900390625,
"learning_rate": 1.9695558196088846e-05,
"loss": 41.0709,
"step": 48900
},
{
"epoch": 1.1041009463722398,
"grad_norm": 1300.223876953125,
"learning_rate": 1.958900965154743e-05,
"loss": 43.9169,
"step": 49000
},
{
"epoch": 1.1063542136097342,
"grad_norm": 411.2376403808594,
"learning_rate": 1.9482564124462476e-05,
"loss": 38.9952,
"step": 49100
},
{
"epoch": 1.1086074808472284,
"grad_norm": 959.572265625,
"learning_rate": 1.937622364140338e-05,
"loss": 39.8025,
"step": 49200
},
{
"epoch": 1.1108607480847228,
"grad_norm": 594.6739501953125,
"learning_rate": 1.9269990226939652e-05,
"loss": 42.7149,
"step": 49300
},
{
"epoch": 1.1131140153222172,
"grad_norm": 0.0,
"learning_rate": 1.9163865903602374e-05,
"loss": 36.2475,
"step": 49400
},
{
"epoch": 1.1153672825597116,
"grad_norm": 792.9468994140625,
"learning_rate": 1.9057852691845677e-05,
"loss": 36.3523,
"step": 49500
},
{
"epoch": 1.117620549797206,
"grad_norm": 1249.7642822265625,
"learning_rate": 1.895195261000831e-05,
"loss": 35.0879,
"step": 49600
},
{
"epoch": 1.1198738170347002,
"grad_norm": 297.8221740722656,
"learning_rate": 1.8846167674275176e-05,
"loss": 40.7354,
"step": 49700
},
{
"epoch": 1.1221270842721947,
"grad_norm": 841.4304809570312,
"learning_rate": 1.874049989863896e-05,
"loss": 38.4855,
"step": 49800
},
{
"epoch": 1.124380351509689,
"grad_norm": 0.0,
"learning_rate": 1.8634951294861808e-05,
"loss": 40.9361,
"step": 49900
},
{
"epoch": 1.1266336187471835,
"grad_norm": 553.7052612304688,
"learning_rate": 1.852952387243698e-05,
"loss": 43.6521,
"step": 50000
},
{
"epoch": 1.1288868859846777,
"grad_norm": 441.8327331542969,
"learning_rate": 1.842421963855063e-05,
"loss": 34.7172,
"step": 50100
},
{
"epoch": 1.131140153222172,
"grad_norm": 886.4244384765625,
"learning_rate": 1.831904059804358e-05,
"loss": 40.2296,
"step": 50200
},
{
"epoch": 1.1333934204596665,
"grad_norm": 3301.468505859375,
"learning_rate": 1.8213988753373146e-05,
"loss": 36.6934,
"step": 50300
},
{
"epoch": 1.135646687697161,
"grad_norm": 1075.7626953125,
"learning_rate": 1.8109066104575023e-05,
"loss": 39.7705,
"step": 50400
},
{
"epoch": 1.1378999549346553,
"grad_norm": 595.6084594726562,
"learning_rate": 1.80042746492252e-05,
"loss": 38.4987,
"step": 50500
},
{
"epoch": 1.1401532221721495,
"grad_norm": 1291.255859375,
"learning_rate": 1.7899616382401936e-05,
"loss": 42.2248,
"step": 50600
},
{
"epoch": 1.142406489409644,
"grad_norm": 431.224853515625,
"learning_rate": 1.779509329664777e-05,
"loss": 39.2028,
"step": 50700
},
{
"epoch": 1.1446597566471384,
"grad_norm": 484.91259765625,
"learning_rate": 1.7690707381931583e-05,
"loss": 42.3496,
"step": 50800
},
{
"epoch": 1.1469130238846328,
"grad_norm": 987.5538940429688,
"learning_rate": 1.7586460625610728e-05,
"loss": 38.0923,
"step": 50900
},
{
"epoch": 1.149166291122127,
"grad_norm": 456.387451171875,
"learning_rate": 1.7482355012393177e-05,
"loss": 37.8689,
"step": 51000
},
{
"epoch": 1.1514195583596214,
"grad_norm": 534.0150146484375,
"learning_rate": 1.737839252429973e-05,
"loss": 40.1989,
"step": 51100
},
{
"epoch": 1.1536728255971158,
"grad_norm": 931.9837036132812,
"learning_rate": 1.7274575140626318e-05,
"loss": 36.386,
"step": 51200
},
{
"epoch": 1.1559260928346102,
"grad_norm": 689.9551391601562,
"learning_rate": 1.7170904837906265e-05,
"loss": 41.5284,
"step": 51300
},
{
"epoch": 1.1581793600721046,
"grad_norm": 1355.0162353515625,
"learning_rate": 1.7067383589872703e-05,
"loss": 40.0137,
"step": 51400
},
{
"epoch": 1.1604326273095988,
"grad_norm": 403.37762451171875,
"learning_rate": 1.6964013367420966e-05,
"loss": 38.1865,
"step": 51500
},
{
"epoch": 1.1626858945470933,
"grad_norm": 677.8140869140625,
"learning_rate": 1.686079613857109e-05,
"loss": 40.588,
"step": 51600
},
{
"epoch": 1.1649391617845877,
"grad_norm": 815.564697265625,
"learning_rate": 1.6757733868430325e-05,
"loss": 35.5447,
"step": 51700
},
{
"epoch": 1.167192429022082,
"grad_norm": 887.0990600585938,
"learning_rate": 1.665482851915573e-05,
"loss": 40.0702,
"step": 51800
},
{
"epoch": 1.1694456962595763,
"grad_norm": 0.0,
"learning_rate": 1.6552082049916825e-05,
"loss": 37.7994,
"step": 51900
},
{
"epoch": 1.1716989634970707,
"grad_norm": 908.9335327148438,
"learning_rate": 1.6449496416858284e-05,
"loss": 41.0771,
"step": 52000
},
{
"epoch": 1.1739522307345651,
"grad_norm": 554.3827514648438,
"learning_rate": 1.6347073573062672e-05,
"loss": 38.6849,
"step": 52100
},
{
"epoch": 1.1762054979720595,
"grad_norm": 0.0,
"learning_rate": 1.6244815468513315e-05,
"loss": 37.5018,
"step": 52200
},
{
"epoch": 1.178458765209554,
"grad_norm": 500.462158203125,
"learning_rate": 1.6142724050057102e-05,
"loss": 40.3684,
"step": 52300
},
{
"epoch": 1.1807120324470481,
"grad_norm": 477.7984924316406,
"learning_rate": 1.6040801261367493e-05,
"loss": 40.3859,
"step": 52400
},
{
"epoch": 1.1829652996845426,
"grad_norm": 701.5234375,
"learning_rate": 1.5939049042907462e-05,
"loss": 37.1264,
"step": 52500
},
{
"epoch": 1.185218566922037,
"grad_norm": 368.4427185058594,
"learning_rate": 1.583746933189257e-05,
"loss": 43.4277,
"step": 52600
},
{
"epoch": 1.1874718341595314,
"grad_norm": 0.0,
"learning_rate": 1.5736064062254094e-05,
"loss": 37.9021,
"step": 52700
},
{
"epoch": 1.1897251013970256,
"grad_norm": 295.5749816894531,
"learning_rate": 1.56348351646022e-05,
"loss": 37.9324,
"step": 52800
},
{
"epoch": 1.19197836863452,
"grad_norm": 732.7645263671875,
"learning_rate": 1.553378456618918e-05,
"loss": 34.4221,
"step": 52900
},
{
"epoch": 1.1942316358720144,
"grad_norm": 578.3222045898438,
"learning_rate": 1.5432914190872757e-05,
"loss": 38.2748,
"step": 53000
},
{
"epoch": 1.1964849031095088,
"grad_norm": 1566.3675537109375,
"learning_rate": 1.533222595907949e-05,
"loss": 39.7942,
"step": 53100
},
{
"epoch": 1.1987381703470033,
"grad_norm": 1014.3735961914062,
"learning_rate": 1.523172178776816e-05,
"loss": 35.2939,
"step": 53200
},
{
"epoch": 1.2009914375844974,
"grad_norm": 851.2781372070312,
"learning_rate": 1.5131403590393323e-05,
"loss": 33.0635,
"step": 53300
},
{
"epoch": 1.2032447048219919,
"grad_norm": 855.8948364257812,
"learning_rate": 1.5031273276868845e-05,
"loss": 36.5281,
"step": 53400
},
{
"epoch": 1.2054979720594863,
"grad_norm": 746.2517700195312,
"learning_rate": 1.4931332753531574e-05,
"loss": 40.8483,
"step": 53500
},
{
"epoch": 1.2077512392969807,
"grad_norm": 737.263671875,
"learning_rate": 1.4831583923104999e-05,
"loss": 36.7425,
"step": 53600
},
{
"epoch": 1.2100045065344749,
"grad_norm": 460.10894775390625,
"learning_rate": 1.4732028684663074e-05,
"loss": 39.4172,
"step": 53700
},
{
"epoch": 1.2122577737719693,
"grad_norm": 532.8964233398438,
"learning_rate": 1.463266893359403e-05,
"loss": 39.1485,
"step": 53800
},
{
"epoch": 1.2145110410094637,
"grad_norm": 746.1353759765625,
"learning_rate": 1.4533506561564306e-05,
"loss": 39.8182,
"step": 53900
},
{
"epoch": 1.2167643082469581,
"grad_norm": 518.3690185546875,
"learning_rate": 1.443454345648252e-05,
"loss": 36.7215,
"step": 54000
},
{
"epoch": 1.2190175754844526,
"grad_norm": 653.6338500976562,
"learning_rate": 1.4335781502463552e-05,
"loss": 36.7699,
"step": 54100
},
{
"epoch": 1.2212708427219467,
"grad_norm": 592.44921875,
"learning_rate": 1.4237222579792618e-05,
"loss": 37.2595,
"step": 54200
},
{
"epoch": 1.2235241099594412,
"grad_norm": 930.57080078125,
"learning_rate": 1.4138868564889573e-05,
"loss": 36.6723,
"step": 54300
},
{
"epoch": 1.2257773771969356,
"grad_norm": 578.7326049804688,
"learning_rate": 1.4040721330273062e-05,
"loss": 41.5042,
"step": 54400
},
{
"epoch": 1.22803064443443,
"grad_norm": 803.9358520507812,
"learning_rate": 1.3942782744524973e-05,
"loss": 37.0519,
"step": 54500
},
{
"epoch": 1.2302839116719242,
"grad_norm": 1721.2294921875,
"learning_rate": 1.3845054672254781e-05,
"loss": 38.7933,
"step": 54600
},
{
"epoch": 1.2325371789094186,
"grad_norm": 861.869140625,
"learning_rate": 1.3747538974064122e-05,
"loss": 37.8534,
"step": 54700
},
{
"epoch": 1.234790446146913,
"grad_norm": 468.365478515625,
"learning_rate": 1.3650237506511331e-05,
"loss": 35.1038,
"step": 54800
},
{
"epoch": 1.2370437133844074,
"grad_norm": 570.2977905273438,
"learning_rate": 1.3553152122076079e-05,
"loss": 34.9327,
"step": 54900
},
{
"epoch": 1.2392969806219019,
"grad_norm": 1238.723388671875,
"learning_rate": 1.3456284669124158e-05,
"loss": 44.4615,
"step": 55000
},
{
"epoch": 1.241550247859396,
"grad_norm": 559.5399169921875,
"learning_rate": 1.3359636991872215e-05,
"loss": 39.2116,
"step": 55100
},
{
"epoch": 1.2438035150968905,
"grad_norm": 436.1620178222656,
"learning_rate": 1.3263210930352737e-05,
"loss": 36.6266,
"step": 55200
},
{
"epoch": 1.2460567823343849,
"grad_norm": 354.4382629394531,
"learning_rate": 1.3167008320378918e-05,
"loss": 34.6987,
"step": 55300
},
{
"epoch": 1.2483100495718793,
"grad_norm": 1071.3955078125,
"learning_rate": 1.3071030993509788e-05,
"loss": 35.2384,
"step": 55400
},
{
"epoch": 1.2505633168093735,
"grad_norm": 0.0,
"learning_rate": 1.2975280777015314e-05,
"loss": 37.3418,
"step": 55500
},
{
"epoch": 1.252816584046868,
"grad_norm": 1224.337646484375,
"learning_rate": 1.2879759493841575e-05,
"loss": 40.2309,
"step": 55600
},
{
"epoch": 1.2550698512843623,
"grad_norm": 1002.5487670898438,
"learning_rate": 1.2784468962576136e-05,
"loss": 36.8461,
"step": 55700
},
{
"epoch": 1.2573231185218567,
"grad_norm": 743.8724975585938,
"learning_rate": 1.2689410997413325e-05,
"loss": 40.4685,
"step": 55800
},
{
"epoch": 1.2595763857593512,
"grad_norm": 876.872802734375,
"learning_rate": 1.2594587408119804e-05,
"loss": 37.6634,
"step": 55900
},
{
"epoch": 1.2618296529968454,
"grad_norm": 2100.144775390625,
"learning_rate": 1.2500000000000006e-05,
"loss": 38.9799,
"step": 56000
},
{
"epoch": 1.2640829202343398,
"grad_norm": 705.1195678710938,
"learning_rate": 1.2405650573861846e-05,
"loss": 39.5435,
"step": 56100
},
{
"epoch": 1.2663361874718342,
"grad_norm": 803.4048461914062,
"learning_rate": 1.2311540925982403e-05,
"loss": 38.1768,
"step": 56200
},
{
"epoch": 1.2685894547093286,
"grad_norm": 833.8876342773438,
"learning_rate": 1.2217672848073702e-05,
"loss": 39.7547,
"step": 56300
},
{
"epoch": 1.2708427219468228,
"grad_norm": 1292.3961181640625,
"learning_rate": 1.2124048127248644e-05,
"loss": 40.0696,
"step": 56400
},
{
"epoch": 1.2730959891843172,
"grad_norm": 686.7381591796875,
"learning_rate": 1.2030668545986959e-05,
"loss": 37.1013,
"step": 56500
},
{
"epoch": 1.2753492564218116,
"grad_norm": 899.0304565429688,
"learning_rate": 1.1937535882101281e-05,
"loss": 36.339,
"step": 56600
},
{
"epoch": 1.277602523659306,
"grad_norm": 632.4103393554688,
"learning_rate": 1.1844651908703261e-05,
"loss": 39.9224,
"step": 56700
},
{
"epoch": 1.2798557908968005,
"grad_norm": 379.15496826171875,
"learning_rate": 1.175201839416988e-05,
"loss": 36.499,
"step": 56800
},
{
"epoch": 1.2821090581342947,
"grad_norm": 382.707275390625,
"learning_rate": 1.1659637102109714e-05,
"loss": 36.4978,
"step": 56900
},
{
"epoch": 1.284362325371789,
"grad_norm": 616.2396240234375,
"learning_rate": 1.1567509791329401e-05,
"loss": 36.3641,
"step": 57000
},
{
"epoch": 1.2866155926092835,
"grad_norm": 2957.53955078125,
"learning_rate": 1.1475638215800156e-05,
"loss": 35.4105,
"step": 57100
},
{
"epoch": 1.288868859846778,
"grad_norm": 676.7122192382812,
"learning_rate": 1.1384024124624324e-05,
"loss": 35.4272,
"step": 57200
},
{
"epoch": 1.291122127084272,
"grad_norm": 318.8774719238281,
"learning_rate": 1.1292669262002159e-05,
"loss": 34.4229,
"step": 57300
},
{
"epoch": 1.2933753943217665,
"grad_norm": 0.0,
"learning_rate": 1.1201575367198547e-05,
"loss": 37.1322,
"step": 57400
},
{
"epoch": 1.295628661559261,
"grad_norm": 397.2348937988281,
"learning_rate": 1.1110744174509952e-05,
"loss": 38.137,
"step": 57500
},
{
"epoch": 1.2978819287967553,
"grad_norm": 580.2010498046875,
"learning_rate": 1.1020177413231334e-05,
"loss": 37.5998,
"step": 57600
},
{
"epoch": 1.3001351960342498,
"grad_norm": 0.0,
"learning_rate": 1.0929876807623285e-05,
"loss": 34.9743,
"step": 57700
},
{
"epoch": 1.302388463271744,
"grad_norm": 1732.2982177734375,
"learning_rate": 1.0839844076879185e-05,
"loss": 37.4842,
"step": 57800
},
{
"epoch": 1.3046417305092384,
"grad_norm": 526.9512329101562,
"learning_rate": 1.0750080935092425e-05,
"loss": 35.1266,
"step": 57900
},
{
"epoch": 1.3068949977467328,
"grad_norm": 893.0797729492188,
"learning_rate": 1.0660589091223855e-05,
"loss": 35.0203,
"step": 58000
},
{
"epoch": 1.3091482649842272,
"grad_norm": 307.86212158203125,
"learning_rate": 1.0571370249069162e-05,
"loss": 41.4471,
"step": 58100
},
{
"epoch": 1.3114015322217214,
"grad_norm": 1008.5663452148438,
"learning_rate": 1.0482426107226507e-05,
"loss": 33.4428,
"step": 58200
},
{
"epoch": 1.3136547994592158,
"grad_norm": 1568.9752197265625,
"learning_rate": 1.0393758359064146e-05,
"loss": 38.1394,
"step": 58300
},
{
"epoch": 1.3159080666967102,
"grad_norm": 754.859130859375,
"learning_rate": 1.0305368692688174e-05,
"loss": 35.5464,
"step": 58400
},
{
"epoch": 1.3181613339342046,
"grad_norm": 412.3022766113281,
"learning_rate": 1.0217258790910448e-05,
"loss": 36.5812,
"step": 58500
},
{
"epoch": 1.320414601171699,
"grad_norm": 726.6887817382812,
"learning_rate": 1.0129430331216471e-05,
"loss": 36.4859,
"step": 58600
},
{
"epoch": 1.3226678684091933,
"grad_norm": 463.0475769042969,
"learning_rate": 1.0041884985733524e-05,
"loss": 38.6337,
"step": 58700
},
{
"epoch": 1.3249211356466877,
"grad_norm": 782.8141479492188,
"learning_rate": 9.954624421198792e-06,
"loss": 40.0479,
"step": 58800
},
{
"epoch": 1.327174402884182,
"grad_norm": 1247.14208984375,
"learning_rate": 9.867650298927645e-06,
"loss": 34.5978,
"step": 58900
},
{
"epoch": 1.3294276701216765,
"grad_norm": 1039.3582763671875,
"learning_rate": 9.780964274781984e-06,
"loss": 36.1797,
"step": 59000
},
{
"epoch": 1.3316809373591707,
"grad_norm": 439.5457763671875,
"learning_rate": 9.694567999138765e-06,
"loss": 36.7625,
"step": 59100
},
{
"epoch": 1.3339342045966651,
"grad_norm": 960.1097412109375,
"learning_rate": 9.608463116858542e-06,
"loss": 35.3218,
"step": 59200
},
{
"epoch": 1.3361874718341595,
"grad_norm": 981.8735961914062,
"learning_rate": 9.522651267254149e-06,
"loss": 38.6512,
"step": 59300
},
{
"epoch": 1.338440739071654,
"grad_norm": 382.9591369628906,
"learning_rate": 9.437134084059515e-06,
"loss": 33.843,
"step": 59400
},
{
"epoch": 1.3406940063091484,
"grad_norm": 454.0657043457031,
"learning_rate": 9.351913195398524e-06,
"loss": 36.7899,
"step": 59500
},
{
"epoch": 1.3429472735466426,
"grad_norm": 381.1954040527344,
"learning_rate": 9.266990223754069e-06,
"loss": 37.5628,
"step": 59600
},
{
"epoch": 1.345200540784137,
"grad_norm": 682.6860961914062,
"learning_rate": 9.1823667859371e-06,
"loss": 37.8172,
"step": 59700
},
{
"epoch": 1.3474538080216314,
"grad_norm": 788.8383178710938,
"learning_rate": 9.098044493055899e-06,
"loss": 35.3697,
"step": 59800
},
{
"epoch": 1.3497070752591258,
"grad_norm": 854.1769409179688,
"learning_rate": 9.014024950485383e-06,
"loss": 33.7009,
"step": 59900
},
{
"epoch": 1.35196034249662,
"grad_norm": 980.5393676757812,
"learning_rate": 8.930309757836517e-06,
"loss": 36.0638,
"step": 60000
},
{
"epoch": 1.3542136097341144,
"grad_norm": 560.2689208984375,
"learning_rate": 8.84690050892591e-06,
"loss": 38.2246,
"step": 60100
},
{
"epoch": 1.3564668769716088,
"grad_norm": 987.0377197265625,
"learning_rate": 8.763798791745411e-06,
"loss": 36.7167,
"step": 60200
},
{
"epoch": 1.3587201442091033,
"grad_norm": 877.1320190429688,
"learning_rate": 8.681006188431946e-06,
"loss": 31.5632,
"step": 60300
},
{
"epoch": 1.3609734114465977,
"grad_norm": 1143.940185546875,
"learning_rate": 8.598524275237322e-06,
"loss": 37.9032,
"step": 60400
},
{
"epoch": 1.3632266786840919,
"grad_norm": 394.9543762207031,
"learning_rate": 8.51635462249828e-06,
"loss": 41.1612,
"step": 60500
},
{
"epoch": 1.3654799459215863,
"grad_norm": 933.392822265625,
"learning_rate": 8.434498794606568e-06,
"loss": 36.5777,
"step": 60600
},
{
"epoch": 1.3677332131590807,
"grad_norm": 958.4544067382812,
"learning_rate": 8.352958349979145e-06,
"loss": 34.62,
"step": 60700
},
{
"epoch": 1.3699864803965751,
"grad_norm": 1324.6890869140625,
"learning_rate": 8.271734841028553e-06,
"loss": 36.4317,
"step": 60800
},
{
"epoch": 1.3722397476340693,
"grad_norm": 562.975341796875,
"learning_rate": 8.190829814133294e-06,
"loss": 37.9488,
"step": 60900
},
{
"epoch": 1.3744930148715637,
"grad_norm": 407.0682067871094,
"learning_rate": 8.110244809608495e-06,
"loss": 35.8329,
"step": 61000
},
{
"epoch": 1.3767462821090581,
"grad_norm": 452.447998046875,
"learning_rate": 8.029981361676456e-06,
"loss": 36.064,
"step": 61100
},
{
"epoch": 1.3789995493465526,
"grad_norm": 673.4539794921875,
"learning_rate": 7.950040998437542e-06,
"loss": 35.4829,
"step": 61200
},
{
"epoch": 1.381252816584047,
"grad_norm": 802.2340698242188,
"learning_rate": 7.87042524184102e-06,
"loss": 38.9247,
"step": 61300
},
{
"epoch": 1.3835060838215412,
"grad_norm": 276.5713806152344,
"learning_rate": 7.791135607656147e-06,
"loss": 36.7328,
"step": 61400
},
{
"epoch": 1.3857593510590356,
"grad_norm": 359.2288818359375,
"learning_rate": 7.712173605443269e-06,
"loss": 35.8955,
"step": 61500
},
{
"epoch": 1.38801261829653,
"grad_norm": 590.3004150390625,
"learning_rate": 7.633540738525066e-06,
"loss": 36.7346,
"step": 61600
},
{
"epoch": 1.3902658855340244,
"grad_norm": 2431.375732421875,
"learning_rate": 7.555238503958001e-06,
"loss": 38.1598,
"step": 61700
},
{
"epoch": 1.3925191527715186,
"grad_norm": 847.335205078125,
"learning_rate": 7.477268392503728e-06,
"loss": 35.9027,
"step": 61800
},
{
"epoch": 1.394772420009013,
"grad_norm": 543.1801147460938,
"learning_rate": 7.399631888600797e-06,
"loss": 37.6817,
"step": 61900
},
{
"epoch": 1.3970256872465074,
"grad_norm": 415.84918212890625,
"learning_rate": 7.3223304703363135e-06,
"loss": 34.3274,
"step": 62000
},
{
"epoch": 1.3992789544840019,
"grad_norm": 777.2684936523438,
"learning_rate": 7.245365609417864e-06,
"loss": 39.1105,
"step": 62100
},
{
"epoch": 1.4015322217214963,
"grad_norm": 1126.7650146484375,
"learning_rate": 7.168738771145464e-06,
"loss": 37.2822,
"step": 62200
},
{
"epoch": 1.4037854889589905,
"grad_norm": 352.9339904785156,
"learning_rate": 7.092451414383644e-06,
"loss": 39.5989,
"step": 62300
},
{
"epoch": 1.4060387561964849,
"grad_norm": 790.8270874023438,
"learning_rate": 7.016504991533726e-06,
"loss": 37.4908,
"step": 62400
},
{
"epoch": 1.4082920234339793,
"grad_norm": 1335.3138427734375,
"learning_rate": 6.940900948506113e-06,
"loss": 42.1853,
"step": 62500
},
{
"epoch": 1.4105452906714737,
"grad_norm": 0.0,
"learning_rate": 6.865640724692815e-06,
"loss": 37.1607,
"step": 62600
},
{
"epoch": 1.412798557908968,
"grad_norm": 466.6181945800781,
"learning_rate": 6.790725752939997e-06,
"loss": 37.1732,
"step": 62700
},
{
"epoch": 1.4150518251464623,
"grad_norm": 317.6047668457031,
"learning_rate": 6.716157459520739e-06,
"loss": 39.2635,
"step": 62800
},
{
"epoch": 1.4173050923839567,
"grad_norm": 553.5538330078125,
"learning_rate": 6.641937264107867e-06,
"loss": 37.3103,
"step": 62900
},
{
"epoch": 1.4195583596214512,
"grad_norm": 282.9200439453125,
"learning_rate": 6.568066579746901e-06,
"loss": 38.9337,
"step": 63000
},
{
"epoch": 1.4218116268589456,
"grad_norm": 1359.1005859375,
"learning_rate": 6.494546812829206e-06,
"loss": 41.63,
"step": 63100
},
{
"epoch": 1.4240648940964398,
"grad_norm": 809.4750366210938,
"learning_rate": 6.421379363065142e-06,
"loss": 33.3718,
"step": 63200
},
{
"epoch": 1.4263181613339342,
"grad_norm": 259.95654296875,
"learning_rate": 6.348565623457514e-06,
"loss": 39.1779,
"step": 63300
},
{
"epoch": 1.4285714285714286,
"grad_norm": 1262.052734375,
"learning_rate": 6.2761069802749455e-06,
"loss": 33.9572,
"step": 63400
},
{
"epoch": 1.430824695808923,
"grad_norm": 234.85487365722656,
"learning_rate": 6.204004813025568e-06,
"loss": 40.2383,
"step": 63500
},
{
"epoch": 1.4330779630464172,
"grad_norm": 342.09375,
"learning_rate": 6.1322604944307e-06,
"loss": 40.2604,
"step": 63600
},
{
"epoch": 1.4353312302839116,
"grad_norm": 1251.5074462890625,
"learning_rate": 6.060875390398757e-06,
"loss": 38.2612,
"step": 63700
},
{
"epoch": 1.437584497521406,
"grad_norm": 1840.7529296875,
"learning_rate": 5.989850859999227e-06,
"loss": 35.3611,
"step": 63800
},
{
"epoch": 1.4398377647589005,
"grad_norm": 1598.6044921875,
"learning_rate": 5.919188255436778e-06,
"loss": 35.9621,
"step": 63900
},
{
"epoch": 1.4420910319963949,
"grad_norm": 492.7054443359375,
"learning_rate": 5.848888922025553e-06,
"loss": 38.0005,
"step": 64000
},
{
"epoch": 1.444344299233889,
"grad_norm": 935.9298095703125,
"learning_rate": 5.778954198163514e-06,
"loss": 37.7181,
"step": 64100
},
{
"epoch": 1.4465975664713835,
"grad_norm": 583.2149658203125,
"learning_rate": 5.709385415307006e-06,
"loss": 38.0206,
"step": 64200
},
{
"epoch": 1.448850833708878,
"grad_norm": 1340.3944091796875,
"learning_rate": 5.640183897945362e-06,
"loss": 36.6506,
"step": 64300
},
{
"epoch": 1.4511041009463723,
"grad_norm": 358.40875244140625,
"learning_rate": 5.571350963575728e-06,
"loss": 36.0538,
"step": 64400
},
{
"epoch": 1.4533573681838665,
"grad_norm": 1554.845703125,
"learning_rate": 5.50288792267796e-06,
"loss": 34.25,
"step": 64500
},
{
"epoch": 1.455610635421361,
"grad_norm": 1170.0960693359375,
"learning_rate": 5.434796078689652e-06,
"loss": 32.1239,
"step": 64600
},
{
"epoch": 1.4578639026588553,
"grad_norm": 0.0,
"learning_rate": 5.367076727981382e-06,
"loss": 40.6604,
"step": 64700
},
{
"epoch": 1.4601171698963498,
"grad_norm": 417.8546142578125,
"learning_rate": 5.299731159831953e-06,
"loss": 37.095,
"step": 64800
},
{
"epoch": 1.4623704371338442,
"grad_norm": 377.0328369140625,
"learning_rate": 5.2327606564039234e-06,
"loss": 35.4373,
"step": 64900
},
{
"epoch": 1.4646237043713384,
"grad_norm": 625.896240234375,
"learning_rate": 5.166166492719124e-06,
"loss": 34.6625,
"step": 65000
},
{
"epoch": 1.4668769716088328,
"grad_norm": 1136.8114013671875,
"learning_rate": 5.099949936634451e-06,
"loss": 35.121,
"step": 65100
},
{
"epoch": 1.4691302388463272,
"grad_norm": 490.1904296875,
"learning_rate": 5.034112248817685e-06,
"loss": 35.5372,
"step": 65200
},
{
"epoch": 1.4713835060838216,
"grad_norm": 299.876953125,
"learning_rate": 4.9686546827234865e-06,
"loss": 36.2584,
"step": 65300
},
{
"epoch": 1.4736367733213158,
"grad_norm": 672.1801147460938,
"learning_rate": 4.903578484569568e-06,
"loss": 38.8503,
"step": 65400
},
{
"epoch": 1.4758900405588102,
"grad_norm": 506.11322021484375,
"learning_rate": 4.8388848933129335e-06,
"loss": 35.2598,
"step": 65500
},
{
"epoch": 1.4781433077963047,
"grad_norm": 347.1400146484375,
"learning_rate": 4.7745751406263165e-06,
"loss": 37.9243,
"step": 65600
},
{
"epoch": 1.480396575033799,
"grad_norm": 741.727783203125,
"learning_rate": 4.710650450874693e-06,
"loss": 36.6068,
"step": 65700
},
{
"epoch": 1.4826498422712935,
"grad_norm": 490.6405944824219,
"learning_rate": 4.647112041092022e-06,
"loss": 37.8632,
"step": 65800
},
{
"epoch": 1.4849031095087877,
"grad_norm": 148.07147216796875,
"learning_rate": 4.583961120958027e-06,
"loss": 39.7858,
"step": 65900
},
{
"epoch": 1.487156376746282,
"grad_norm": 1348.078857421875,
"learning_rate": 4.521198892775203e-06,
"loss": 42.7009,
"step": 66000
},
{
"epoch": 1.4894096439837765,
"grad_norm": 207.09417724609375,
"learning_rate": 4.45882655144591e-06,
"loss": 35.0663,
"step": 66100
},
{
"epoch": 1.4916629112212707,
"grad_norm": 810.766845703125,
"learning_rate": 4.396845284449608e-06,
"loss": 36.5057,
"step": 66200
},
{
"epoch": 1.4939161784587651,
"grad_norm": 2021.21728515625,
"learning_rate": 4.335256271820287e-06,
"loss": 36.8715,
"step": 66300
},
{
"epoch": 1.4961694456962595,
"grad_norm": 0.0,
"learning_rate": 4.274060686123959e-06,
"loss": 35.6832,
"step": 66400
},
{
"epoch": 1.498422712933754,
"grad_norm": 1135.528076171875,
"learning_rate": 4.213259692436367e-06,
"loss": 33.5205,
"step": 66500
},
{
"epoch": 1.5006759801712484,
"grad_norm": 950.0307006835938,
"learning_rate": 4.152854448320797e-06,
"loss": 37.6403,
"step": 66600
},
{
"epoch": 1.5029292474087428,
"grad_norm": 1010.8804321289062,
"learning_rate": 4.092846103806011e-06,
"loss": 35.893,
"step": 66700
},
{
"epoch": 1.5051825146462372,
"grad_norm": 2760.01611328125,
"learning_rate": 4.0332358013644016e-06,
"loss": 40.6062,
"step": 66800
},
{
"epoch": 1.5074357818837314,
"grad_norm": 843.011962890625,
"learning_rate": 3.9740246758901895e-06,
"loss": 37.1098,
"step": 66900
},
{
"epoch": 1.5096890491212258,
"grad_norm": 1276.917236328125,
"learning_rate": 3.9152138546778625e-06,
"loss": 32.2139,
"step": 67000
},
{
"epoch": 1.51194231635872,
"grad_norm": 1487.1099853515625,
"learning_rate": 3.85680445740067e-06,
"loss": 37.2284,
"step": 67100
},
{
"epoch": 1.5141955835962144,
"grad_norm": 953.3679809570312,
"learning_rate": 3.798797596089351e-06,
"loss": 39.7566,
"step": 67200
},
{
"epoch": 1.5164488508337088,
"grad_norm": 858.4658813476562,
"learning_rate": 3.741194375110932e-06,
"loss": 35.92,
"step": 67300
},
{
"epoch": 1.5187021180712033,
"grad_norm": 309.0785217285156,
"learning_rate": 3.6839958911476957e-06,
"loss": 38.8753,
"step": 67400
},
{
"epoch": 1.5209553853086977,
"grad_norm": 1036.7149658203125,
"learning_rate": 3.6272032331763408e-06,
"loss": 38.0908,
"step": 67500
},
{
"epoch": 1.523208652546192,
"grad_norm": 620.3539428710938,
"learning_rate": 3.5708174824471947e-06,
"loss": 34.9211,
"step": 67600
},
{
"epoch": 1.5254619197836865,
"grad_norm": 447.8617248535156,
"learning_rate": 3.5148397124636826e-06,
"loss": 39.3934,
"step": 67700
},
{
"epoch": 1.5277151870211807,
"grad_norm": 517.4603881835938,
"learning_rate": 3.4592709889618545e-06,
"loss": 36.0529,
"step": 67800
},
{
"epoch": 1.5299684542586751,
"grad_norm": 872.0263671875,
"learning_rate": 3.4041123698901084e-06,
"loss": 34.7142,
"step": 67900
},
{
"epoch": 1.5322217214961693,
"grad_norm": 596.3970947265625,
"learning_rate": 3.3493649053890326e-06,
"loss": 35.7701,
"step": 68000
},
{
"epoch": 1.5344749887336637,
"grad_norm": 1187.1817626953125,
"learning_rate": 3.295029637771441e-06,
"loss": 34.7188,
"step": 68100
},
{
"epoch": 1.5367282559711581,
"grad_norm": 2972.13916015625,
"learning_rate": 3.2411076015025075e-06,
"loss": 39.0094,
"step": 68200
},
{
"epoch": 1.5389815232086526,
"grad_norm": 484.574951171875,
"learning_rate": 3.187599823180071e-06,
"loss": 36.4442,
"step": 68300
},
{
"epoch": 1.541234790446147,
"grad_norm": 512.0244140625,
"learning_rate": 3.1345073215151066e-06,
"loss": 35.6959,
"step": 68400
},
{
"epoch": 1.5434880576836414,
"grad_norm": 1594.818603515625,
"learning_rate": 3.081831107312308e-06,
"loss": 33.7189,
"step": 68500
},
{
"epoch": 1.5457413249211358,
"grad_norm": 412.11993408203125,
"learning_rate": 3.029572183450868e-06,
"loss": 39.2777,
"step": 68600
},
{
"epoch": 1.54799459215863,
"grad_norm": 261.7032165527344,
"learning_rate": 2.9777315448653614e-06,
"loss": 37.5702,
"step": 68700
},
{
"epoch": 1.5502478593961244,
"grad_norm": 1331.4417724609375,
"learning_rate": 2.9263101785268254e-06,
"loss": 38.3099,
"step": 68800
},
{
"epoch": 1.5525011266336186,
"grad_norm": 1108.656005859375,
"learning_rate": 2.875309063423956e-06,
"loss": 37.6969,
"step": 68900
},
{
"epoch": 1.554754393871113,
"grad_norm": 629.5645751953125,
"learning_rate": 2.8247291705444575e-06,
"loss": 34.4015,
"step": 69000
},
{
"epoch": 1.5570076611086074,
"grad_norm": 339.4286193847656,
"learning_rate": 2.7745714628565927e-06,
"loss": 34.8371,
"step": 69100
},
{
"epoch": 1.5592609283461019,
"grad_norm": 497.2080383300781,
"learning_rate": 2.7248368952908053e-06,
"loss": 34.618,
"step": 69200
},
{
"epoch": 1.5615141955835963,
"grad_norm": 447.3420104980469,
"learning_rate": 2.6755264147215797e-06,
"loss": 34.2607,
"step": 69300
},
{
"epoch": 1.5637674628210907,
"grad_norm": 507.5426330566406,
"learning_rate": 2.6266409599493753e-06,
"loss": 40.3609,
"step": 69400
},
{
"epoch": 1.566020730058585,
"grad_norm": 504.575927734375,
"learning_rate": 2.578181461682794e-06,
"loss": 35.5052,
"step": 69500
},
{
"epoch": 1.5682739972960793,
"grad_norm": 614.0762329101562,
"learning_rate": 2.5301488425208296e-06,
"loss": 35.4192,
"step": 69600
},
{
"epoch": 1.5705272645335737,
"grad_norm": 1427.1513671875,
"learning_rate": 2.482544016935304e-06,
"loss": 33.3038,
"step": 69700
},
{
"epoch": 1.572780531771068,
"grad_norm": 1021.6607666015625,
"learning_rate": 2.43536789125349e-06,
"loss": 33.7017,
"step": 69800
},
{
"epoch": 1.5750337990085623,
"grad_norm": 1753.479736328125,
"learning_rate": 2.3886213636407973e-06,
"loss": 36.5257,
"step": 69900
},
{
"epoch": 1.5772870662460567,
"grad_norm": 1146.5517578125,
"learning_rate": 2.3423053240837515e-06,
"loss": 36.1648,
"step": 70000
},
{
"epoch": 1.5795403334835512,
"grad_norm": 586.0597534179688,
"learning_rate": 2.296420654372966e-06,
"loss": 38.7891,
"step": 70100
},
{
"epoch": 1.5817936007210456,
"grad_norm": 637.76904296875,
"learning_rate": 2.2509682280864224e-06,
"loss": 33.7852,
"step": 70200
},
{
"epoch": 1.58404686795854,
"grad_norm": 0.0,
"learning_rate": 2.205948910572786e-06,
"loss": 34.2423,
"step": 70300
},
{
"epoch": 1.5863001351960344,
"grad_norm": 583.9962158203125,
"learning_rate": 2.1613635589349756e-06,
"loss": 35.2592,
"step": 70400
},
{
"epoch": 1.5885534024335286,
"grad_norm": 372.9781799316406,
"learning_rate": 2.1172130220138226e-06,
"loss": 36.0234,
"step": 70500
},
{
"epoch": 1.590806669671023,
"grad_norm": 689.7586669921875,
"learning_rate": 2.073498140371899e-06,
"loss": 38.4326,
"step": 70600
},
{
"epoch": 1.5930599369085172,
"grad_norm": 1217.2618408203125,
"learning_rate": 2.030219746277545e-06,
"loss": 35.6317,
"step": 70700
},
{
"epoch": 1.5953132041460116,
"grad_norm": 1366.7015380859375,
"learning_rate": 1.9873786636889906e-06,
"loss": 32.8517,
"step": 70800
},
{
"epoch": 1.597566471383506,
"grad_norm": 463.5328063964844,
"learning_rate": 1.9449757082387083e-06,
"loss": 36.9324,
"step": 70900
},
{
"epoch": 1.5998197386210005,
"grad_norm": 873.59130859375,
"learning_rate": 1.9030116872178316e-06,
"loss": 35.7242,
"step": 71000
},
{
"epoch": 1.6020730058584949,
"grad_norm": 533.456298828125,
"learning_rate": 1.8614873995608406e-06,
"loss": 34.3465,
"step": 71100
},
{
"epoch": 1.6043262730959893,
"grad_norm": 1079.99853515625,
"learning_rate": 1.8204036358303173e-06,
"loss": 35.8141,
"step": 71200
},
{
"epoch": 1.6065795403334837,
"grad_norm": 788.5497436523438,
"learning_rate": 1.7797611782018942e-06,
"loss": 34.5627,
"step": 71300
},
{
"epoch": 1.608832807570978,
"grad_norm": 975.3521728515625,
"learning_rate": 1.7395608004493886e-06,
"loss": 34.3037,
"step": 71400
},
{
"epoch": 1.6110860748084723,
"grad_norm": 0.0,
"learning_rate": 1.6998032679300391e-06,
"loss": 33.9326,
"step": 71500
},
{
"epoch": 1.6133393420459665,
"grad_norm": 1441.5693359375,
"learning_rate": 1.6604893375699594e-06,
"loss": 34.2679,
"step": 71600
},
{
"epoch": 1.615592609283461,
"grad_norm": 1534.6036376953125,
"learning_rate": 1.62161975784971e-06,
"loss": 31.3119,
"step": 71700
},
{
"epoch": 1.6178458765209554,
"grad_norm": 1183.213623046875,
"learning_rate": 1.5831952687900608e-06,
"loss": 37.1831,
"step": 71800
},
{
"epoch": 1.6200991437584498,
"grad_norm": 1218.68505859375,
"learning_rate": 1.5452166019378989e-06,
"loss": 35.6401,
"step": 71900
},
{
"epoch": 1.6223524109959442,
"grad_norm": 1537.08056640625,
"learning_rate": 1.5076844803522922e-06,
"loss": 36.5323,
"step": 72000
},
{
"epoch": 1.6246056782334386,
"grad_norm": 1524.171875,
"learning_rate": 1.4705996185907373e-06,
"loss": 35.4555,
"step": 72100
},
{
"epoch": 1.626858945470933,
"grad_norm": 805.3032836914062,
"learning_rate": 1.4339627226955392e-06,
"loss": 36.3888,
"step": 72200
},
{
"epoch": 1.6291122127084272,
"grad_norm": 0.0,
"learning_rate": 1.3977744901803951e-06,
"loss": 36.6125,
"step": 72300
},
{
"epoch": 1.6313654799459216,
"grad_norm": 764.4529418945312,
"learning_rate": 1.362035610017079e-06,
"loss": 36.2714,
"step": 72400
},
{
"epoch": 1.6336187471834158,
"grad_norm": 208.58338928222656,
"learning_rate": 1.3267467626223606e-06,
"loss": 34.2652,
"step": 72500
},
{
"epoch": 1.6358720144209102,
"grad_norm": 1677.5860595703125,
"learning_rate": 1.291908619845017e-06,
"loss": 35.1815,
"step": 72600
},
{
"epoch": 1.6381252816584047,
"grad_norm": 431.1494140625,
"learning_rate": 1.2575218449530746e-06,
"loss": 33.5625,
"step": 72700
},
{
"epoch": 1.640378548895899,
"grad_norm": 833.9300537109375,
"learning_rate": 1.2235870926211619e-06,
"loss": 36.9845,
"step": 72800
},
{
"epoch": 1.6426318161333935,
"grad_norm": 671.1135864257812,
"learning_rate": 1.190105008918041e-06,
"loss": 35.8609,
"step": 72900
},
{
"epoch": 1.644885083370888,
"grad_norm": 565.2303466796875,
"learning_rate": 1.1570762312943295e-06,
"loss": 32.2597,
"step": 73000
},
{
"epoch": 1.6471383506083823,
"grad_norm": 701.4552001953125,
"learning_rate": 1.1245013885703343e-06,
"loss": 36.2914,
"step": 73100
},
{
"epoch": 1.6493916178458765,
"grad_norm": 748.3810424804688,
"learning_rate": 1.0923811009241142e-06,
"loss": 32.0475,
"step": 73200
},
{
"epoch": 1.651644885083371,
"grad_norm": 581.8877563476562,
"learning_rate": 1.0607159798796396e-06,
"loss": 34.1596,
"step": 73300
},
{
"epoch": 1.6538981523208651,
"grad_norm": 377.5722961425781,
"learning_rate": 1.0295066282951738e-06,
"loss": 38.7637,
"step": 73400
},
{
"epoch": 1.6561514195583595,
"grad_norm": 905.6104736328125,
"learning_rate": 9.98753640351785e-07,
"loss": 39.1345,
"step": 73500
},
{
"epoch": 1.658404686795854,
"grad_norm": 491.26531982421875,
"learning_rate": 9.684576015420278e-07,
"loss": 34.3473,
"step": 73600
},
{
"epoch": 1.6606579540333484,
"grad_norm": 950.9983520507812,
"learning_rate": 9.386190886588208e-07,
"loss": 37.4909,
"step": 73700
},
{
"epoch": 1.6629112212708428,
"grad_norm": 1486.637451171875,
"learning_rate": 9.092386697844263e-07,
"loss": 32.4935,
"step": 73800
},
{
"epoch": 1.6651644885083372,
"grad_norm": 797.1807861328125,
"learning_rate": 8.803169042796766e-07,
"loss": 39.2807,
"step": 73900
},
{
"epoch": 1.6674177557458316,
"grad_norm": 1035.8782958984375,
"learning_rate": 8.51854342773295e-07,
"loss": 35.8686,
"step": 74000
},
{
"epoch": 1.6696710229833258,
"grad_norm": 850.1578979492188,
"learning_rate": 8.23851527151423e-07,
"loss": 33.6619,
"step": 74100
},
{
"epoch": 1.6719242902208202,
"grad_norm": 570.3875732421875,
"learning_rate": 7.963089905473092e-07,
"loss": 34.901,
"step": 74200
},
{
"epoch": 1.6741775574583144,
"grad_norm": 911.00341796875,
"learning_rate": 7.692272573311426e-07,
"loss": 37.3474,
"step": 74300
},
{
"epoch": 1.6764308246958088,
"grad_norm": 1152.5958251953125,
"learning_rate": 7.426068431000882e-07,
"loss": 32.3363,
"step": 74400
},
{
"epoch": 1.6786840919333033,
"grad_norm": 1237.0150146484375,
"learning_rate": 7.164482546684642e-07,
"loss": 35.8239,
"step": 74500
},
{
"epoch": 1.6809373591707977,
"grad_norm": 508.03125,
"learning_rate": 6.907519900580861e-07,
"loss": 36.2687,
"step": 74600
},
{
"epoch": 1.683190626408292,
"grad_norm": 325.2568359375,
"learning_rate": 6.65518538488788e-07,
"loss": 33.2241,
"step": 74700
},
{
"epoch": 1.6854438936457865,
"grad_norm": 393.15087890625,
"learning_rate": 6.407483803691216e-07,
"loss": 36.265,
"step": 74800
},
{
"epoch": 1.687697160883281,
"grad_norm": 647.8585205078125,
"learning_rate": 6.164419872871835e-07,
"loss": 32.4864,
"step": 74900
},
{
"epoch": 1.6899504281207751,
"grad_norm": 1219.8280029296875,
"learning_rate": 5.925998220016659e-07,
"loss": 37.0409,
"step": 75000
},
{
"epoch": 1.6922036953582695,
"grad_norm": 969.5092163085938,
"learning_rate": 5.692223384330287e-07,
"loss": 36.2839,
"step": 75100
},
{
"epoch": 1.6944569625957637,
"grad_norm": 1250.261962890625,
"learning_rate": 5.463099816548579e-07,
"loss": 34.0554,
"step": 75200
},
{
"epoch": 1.6967102298332581,
"grad_norm": 867.454833984375,
"learning_rate": 5.238631878854039e-07,
"loss": 37.9732,
"step": 75300
},
{
"epoch": 1.6989634970707526,
"grad_norm": 418.4448547363281,
"learning_rate": 5.018823844792603e-07,
"loss": 37.3185,
"step": 75400
},
{
"epoch": 1.701216764308247,
"grad_norm": 1724.4500732421875,
"learning_rate": 4.803679899192392e-07,
"loss": 35.9752,
"step": 75500
},
{
"epoch": 1.7034700315457414,
"grad_norm": 621.5121459960938,
"learning_rate": 4.5932041380840065e-07,
"loss": 36.9349,
"step": 75600
},
{
"epoch": 1.7057232987832358,
"grad_norm": 843.6145629882812,
"learning_rate": 4.3874005686225796e-07,
"loss": 36.2777,
"step": 75700
},
{
"epoch": 1.7079765660207302,
"grad_norm": 800.4345703125,
"learning_rate": 4.1862731090113736e-07,
"loss": 37.3644,
"step": 75800
},
{
"epoch": 1.7102298332582244,
"grad_norm": 1632.2015380859375,
"learning_rate": 3.9898255884272817e-07,
"loss": 34.1544,
"step": 75900
},
{
"epoch": 1.7124831004957188,
"grad_norm": 1353.8416748046875,
"learning_rate": 3.7980617469479953e-07,
"loss": 37.3623,
"step": 76000
},
{
"epoch": 1.714736367733213,
"grad_norm": 423.76629638671875,
"learning_rate": 3.6109852354805627e-07,
"loss": 36.1738,
"step": 76100
},
{
"epoch": 1.7169896349707074,
"grad_norm": 927.58203125,
"learning_rate": 3.428599615692141e-07,
"loss": 37.3451,
"step": 76200
},
{
"epoch": 1.7192429022082019,
"grad_norm": 693.3057250976562,
"learning_rate": 3.250908359942045e-07,
"loss": 34.0252,
"step": 76300
},
{
"epoch": 1.7214961694456963,
"grad_norm": 597.1045532226562,
"learning_rate": 3.077914851215585e-07,
"loss": 36.9648,
"step": 76400
},
{
"epoch": 1.7237494366831907,
"grad_norm": 1085.2398681640625,
"learning_rate": 2.909622383059835e-07,
"loss": 36.2999,
"step": 76500
},
{
"epoch": 1.726002703920685,
"grad_norm": 890.6967163085938,
"learning_rate": 2.746034159520794e-07,
"loss": 32.9992,
"step": 76600
},
{
"epoch": 1.7282559711581793,
"grad_norm": 722.5303344726562,
"learning_rate": 2.5871532950824394e-07,
"loss": 35.6259,
"step": 76700
},
{
"epoch": 1.7305092383956737,
"grad_norm": 797.7754516601562,
"learning_rate": 2.4329828146074095e-07,
"loss": 38.2101,
"step": 76800
},
{
"epoch": 1.7327625056331681,
"grad_norm": 1722.019775390625,
"learning_rate": 2.283525653279439e-07,
"loss": 34.0617,
"step": 76900
},
{
"epoch": 1.7350157728706623,
"grad_norm": 345.729248046875,
"learning_rate": 2.1387846565474045e-07,
"loss": 34.5921,
"step": 77000
},
{
"epoch": 1.7372690401081567,
"grad_norm": 366.74639892578125,
"learning_rate": 1.998762580071256e-07,
"loss": 33.4238,
"step": 77100
},
{
"epoch": 1.7395223073456512,
"grad_norm": 878.8009643554688,
"learning_rate": 1.8634620896695043e-07,
"loss": 35.3607,
"step": 77200
},
{
"epoch": 1.7417755745831456,
"grad_norm": 2363.9150390625,
"learning_rate": 1.732885761268427e-07,
"loss": 39.3755,
"step": 77300
},
{
"epoch": 1.74402884182064,
"grad_norm": 594.54150390625,
"learning_rate": 1.607036080853136e-07,
"loss": 32.0209,
"step": 77400
},
{
"epoch": 1.7462821090581344,
"grad_norm": 1179.718994140625,
"learning_rate": 1.4859154444200884e-07,
"loss": 35.8264,
"step": 77500
},
{
"epoch": 1.7485353762956286,
"grad_norm": 0.0,
"learning_rate": 1.3695261579316777e-07,
"loss": 34.839,
"step": 77600
},
{
"epoch": 1.750788643533123,
"grad_norm": 662.6306762695312,
"learning_rate": 1.257870437272074e-07,
"loss": 38.8326,
"step": 77700
},
{
"epoch": 1.7530419107706174,
"grad_norm": 451.3083190917969,
"learning_rate": 1.1509504082052869e-07,
"loss": 33.723,
"step": 77800
},
{
"epoch": 1.7552951780081116,
"grad_norm": 844.54833984375,
"learning_rate": 1.0487681063345856e-07,
"loss": 36.3044,
"step": 77900
},
{
"epoch": 1.757548445245606,
"grad_norm": 298.12890625,
"learning_rate": 9.513254770636137e-08,
"loss": 35.3172,
"step": 78000
},
{
"epoch": 1.7598017124831005,
"grad_norm": 899.2721557617188,
"learning_rate": 8.586243755596413e-08,
"loss": 37.8019,
"step": 78100
},
{
"epoch": 1.7620549797205949,
"grad_norm": 302.97686767578125,
"learning_rate": 7.706665667180091e-08,
"loss": 33.4112,
"step": 78200
},
{
"epoch": 1.7643082469580893,
"grad_norm": 1330.9530029296875,
"learning_rate": 6.874537251286006e-08,
"loss": 36.4382,
"step": 78300
},
{
"epoch": 1.7665615141955837,
"grad_norm": 576.2586059570312,
"learning_rate": 6.089874350439506e-08,
"loss": 40.1275,
"step": 78400
},
{
"epoch": 1.768814781433078,
"grad_norm": 1114.9024658203125,
"learning_rate": 5.352691903491303e-08,
"loss": 36.7309,
"step": 78500
},
{
"epoch": 1.7710680486705723,
"grad_norm": 118.37944030761719,
"learning_rate": 4.6630039453327e-08,
"loss": 38.1478,
"step": 78600
},
{
"epoch": 1.7733213159080667,
"grad_norm": 589.9091796875,
"learning_rate": 4.020823606628032e-08,
"loss": 36.3156,
"step": 78700
},
{
"epoch": 1.775574583145561,
"grad_norm": 628.8412475585938,
"learning_rate": 3.426163113565417e-08,
"loss": 37.6031,
"step": 78800
},
{
"epoch": 1.7778278503830554,
"grad_norm": 350.41790771484375,
"learning_rate": 2.879033787623331e-08,
"loss": 34.7334,
"step": 78900
},
{
"epoch": 1.7800811176205498,
"grad_norm": 441.7391357421875,
"learning_rate": 2.3794460453555047e-08,
"loss": 35.6559,
"step": 79000
},
{
"epoch": 1.7823343848580442,
"grad_norm": 370.7310791015625,
"learning_rate": 1.9274093981927478e-08,
"loss": 33.2237,
"step": 79100
},
{
"epoch": 1.7845876520955386,
"grad_norm": 804.0838623046875,
"learning_rate": 1.522932452260595e-08,
"loss": 33.7644,
"step": 79200
},
{
"epoch": 1.786840919333033,
"grad_norm": 804.6845092773438,
"learning_rate": 1.1660229082177676e-08,
"loss": 33.2785,
"step": 79300
},
{
"epoch": 1.7890941865705272,
"grad_norm": 561.6786499023438,
"learning_rate": 8.566875611068504e-09,
"loss": 39.918,
"step": 79400
},
{
"epoch": 1.7913474538080216,
"grad_norm": 894.83544921875,
"learning_rate": 5.94932300227169e-09,
"loss": 35.263,
"step": 79500
},
{
"epoch": 1.793600721045516,
"grad_norm": 0.0,
"learning_rate": 3.807621090218261e-09,
"loss": 36.6426,
"step": 79600
},
{
"epoch": 1.7958539882830102,
"grad_norm": 489.0849914550781,
"learning_rate": 2.1418106498249933e-09,
"loss": 34.9072,
"step": 79700
},
{
"epoch": 1.7981072555205047,
"grad_norm": 893.0217895507812,
"learning_rate": 9.51923395717258e-10,
"loss": 36.0869,
"step": 79800
},
{
"epoch": 1.800360522757999,
"grad_norm": 5594.39306640625,
"learning_rate": 2.379819816378248e-10,
"loss": 33.2752,
"step": 79900
},
{
"epoch": 1.8026137899954935,
"grad_norm": 783.1289672851562,
"learning_rate": 0.0,
"loss": 37.2431,
"step": 80000
}
],
"logging_steps": 100,
"max_steps": 80000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 40000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}