|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.8026137899954935, |
|
"eval_steps": 500, |
|
"global_step": 80000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002253267237494367, |
|
"grad_norm": 388160.65625, |
|
"learning_rate": 6.25e-07, |
|
"loss": 54363.64, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004506534474988734, |
|
"grad_norm": 168947.046875, |
|
"learning_rate": 1.25e-06, |
|
"loss": 31506.035, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0067598017124831005, |
|
"grad_norm": 1104.2374267578125, |
|
"learning_rate": 1.875e-06, |
|
"loss": 1566.8063, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.009013068949977467, |
|
"grad_norm": 1060.940673828125, |
|
"learning_rate": 2.5e-06, |
|
"loss": 321.4997, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.011266336187471835, |
|
"grad_norm": 558.04296875, |
|
"learning_rate": 3.125e-06, |
|
"loss": 310.3988, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.013519603424966201, |
|
"grad_norm": 1218.569091796875, |
|
"learning_rate": 3.75e-06, |
|
"loss": 295.2427, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.015772870662460567, |
|
"grad_norm": 1162.5400390625, |
|
"learning_rate": 4.375e-06, |
|
"loss": 264.2469, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.018026137899954935, |
|
"grad_norm": 1011.2730102539062, |
|
"learning_rate": 5e-06, |
|
"loss": 251.2306, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.020279405137449302, |
|
"grad_norm": 2534.6201171875, |
|
"learning_rate": 5.625e-06, |
|
"loss": 233.5792, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.02253267237494367, |
|
"grad_norm": 968.279052734375, |
|
"learning_rate": 6.25e-06, |
|
"loss": 190.202, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.024785939612438034, |
|
"grad_norm": 1023.931884765625, |
|
"learning_rate": 6.875000000000001e-06, |
|
"loss": 205.8357, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.027039206849932402, |
|
"grad_norm": 921.0325927734375, |
|
"learning_rate": 7.5e-06, |
|
"loss": 204.0791, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.02929247408742677, |
|
"grad_norm": 2596.747314453125, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 191.1482, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.031545741324921134, |
|
"grad_norm": 7635.25634765625, |
|
"learning_rate": 8.75e-06, |
|
"loss": 202.4382, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.0337990085624155, |
|
"grad_norm": 973.6043701171875, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 189.9945, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.03605227579990987, |
|
"grad_norm": 1417.5162353515625, |
|
"learning_rate": 1e-05, |
|
"loss": 193.7698, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.03830554303740424, |
|
"grad_norm": 1212.5467529296875, |
|
"learning_rate": 1.0625e-05, |
|
"loss": 170.7415, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.040558810274898605, |
|
"grad_norm": 814.8417358398438, |
|
"learning_rate": 1.125e-05, |
|
"loss": 199.9652, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.04281207751239297, |
|
"grad_norm": 2018.5306396484375, |
|
"learning_rate": 1.1875e-05, |
|
"loss": 181.4627, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.04506534474988734, |
|
"grad_norm": 865.38427734375, |
|
"learning_rate": 1.25e-05, |
|
"loss": 178.8928, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0473186119873817, |
|
"grad_norm": 3710.726318359375, |
|
"learning_rate": 1.3125e-05, |
|
"loss": 187.2118, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.04957187922487607, |
|
"grad_norm": 890.4869995117188, |
|
"learning_rate": 1.3750000000000002e-05, |
|
"loss": 175.1732, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.051825146462370436, |
|
"grad_norm": 1558.48876953125, |
|
"learning_rate": 1.4374999999999999e-05, |
|
"loss": 182.1822, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.054078413699864804, |
|
"grad_norm": 899.6353149414062, |
|
"learning_rate": 1.5e-05, |
|
"loss": 174.2233, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.05633168093735917, |
|
"grad_norm": 2251.648193359375, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 171.1752, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.05858494817485354, |
|
"grad_norm": 12215.8701171875, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 164.0133, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.06083821541234791, |
|
"grad_norm": 9239.3564453125, |
|
"learning_rate": 1.6875000000000004e-05, |
|
"loss": 162.7651, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.06309148264984227, |
|
"grad_norm": 1981.06494140625, |
|
"learning_rate": 1.75e-05, |
|
"loss": 149.4263, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.06534474988733664, |
|
"grad_norm": 1472.9056396484375, |
|
"learning_rate": 1.8125e-05, |
|
"loss": 139.6899, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.067598017124831, |
|
"grad_norm": 840.9261474609375, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 147.1371, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.06985128436232538, |
|
"grad_norm": 1072.093994140625, |
|
"learning_rate": 1.9375e-05, |
|
"loss": 136.6015, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.07210455159981974, |
|
"grad_norm": 1688.5560302734375, |
|
"learning_rate": 2e-05, |
|
"loss": 143.6873, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.0743578188373141, |
|
"grad_norm": 2323.202880859375, |
|
"learning_rate": 2.0625e-05, |
|
"loss": 133.3337, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.07661108607480847, |
|
"grad_norm": 7761.54638671875, |
|
"learning_rate": 2.125e-05, |
|
"loss": 130.355, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.07886435331230283, |
|
"grad_norm": 2193.791015625, |
|
"learning_rate": 2.1875e-05, |
|
"loss": 140.9645, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.08111762054979721, |
|
"grad_norm": 1536.9178466796875, |
|
"learning_rate": 2.25e-05, |
|
"loss": 123.8844, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.08337088778729157, |
|
"grad_norm": 1548.496826171875, |
|
"learning_rate": 2.3125000000000003e-05, |
|
"loss": 128.6075, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.08562415502478594, |
|
"grad_norm": 1170.0714111328125, |
|
"learning_rate": 2.375e-05, |
|
"loss": 126.8377, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.0878774222622803, |
|
"grad_norm": 756.2473754882812, |
|
"learning_rate": 2.4375e-05, |
|
"loss": 125.7238, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.09013068949977468, |
|
"grad_norm": 456.4137268066406, |
|
"learning_rate": 2.5e-05, |
|
"loss": 120.9195, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.09238395673726904, |
|
"grad_norm": 2030.7421875, |
|
"learning_rate": 2.5625e-05, |
|
"loss": 115.6586, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.0946372239747634, |
|
"grad_norm": 7973.09765625, |
|
"learning_rate": 2.625e-05, |
|
"loss": 117.5762, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.09689049121225778, |
|
"grad_norm": 950.62060546875, |
|
"learning_rate": 2.6875e-05, |
|
"loss": 103.6103, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.09914375844975214, |
|
"grad_norm": 825.5482177734375, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 117.7064, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.10139702568724651, |
|
"grad_norm": 4125.6982421875, |
|
"learning_rate": 2.8125000000000003e-05, |
|
"loss": 123.1462, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.10365029292474087, |
|
"grad_norm": 5001.68212890625, |
|
"learning_rate": 2.8749999999999997e-05, |
|
"loss": 115.605, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.10590356016223525, |
|
"grad_norm": 1243.29150390625, |
|
"learning_rate": 2.9375000000000003e-05, |
|
"loss": 117.9468, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.10815682739972961, |
|
"grad_norm": 1567.5623779296875, |
|
"learning_rate": 3e-05, |
|
"loss": 108.8766, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.11041009463722397, |
|
"grad_norm": 623.7001953125, |
|
"learning_rate": 3.0625000000000006e-05, |
|
"loss": 112.1779, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.11266336187471834, |
|
"grad_norm": 923.6962890625, |
|
"learning_rate": 3.125e-05, |
|
"loss": 103.1436, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.1149166291122127, |
|
"grad_norm": 1227.22802734375, |
|
"learning_rate": 3.1875e-05, |
|
"loss": 104.1329, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.11716989634970708, |
|
"grad_norm": 1491.0849609375, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 97.9254, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.11942316358720144, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.3125e-05, |
|
"loss": 102.5234, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.12167643082469581, |
|
"grad_norm": 1849.3809814453125, |
|
"learning_rate": 3.375000000000001e-05, |
|
"loss": 106.2018, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.12392969806219017, |
|
"grad_norm": 1325.9857177734375, |
|
"learning_rate": 3.4375e-05, |
|
"loss": 109.5124, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.12618296529968454, |
|
"grad_norm": 1684.9053955078125, |
|
"learning_rate": 3.5e-05, |
|
"loss": 100.4184, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.1284362325371789, |
|
"grad_norm": 2124.258544921875, |
|
"learning_rate": 3.5625000000000005e-05, |
|
"loss": 106.471, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.13068949977467328, |
|
"grad_norm": 771.2640380859375, |
|
"learning_rate": 3.625e-05, |
|
"loss": 102.0049, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.13294276701216765, |
|
"grad_norm": 822.8856811523438, |
|
"learning_rate": 3.6875e-05, |
|
"loss": 105.0164, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.135196034249662, |
|
"grad_norm": 1594.832763671875, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 95.0338, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.13744930148715637, |
|
"grad_norm": 2976.5205078125, |
|
"learning_rate": 3.8125e-05, |
|
"loss": 92.8634, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.13970256872465076, |
|
"grad_norm": 1695.48681640625, |
|
"learning_rate": 3.875e-05, |
|
"loss": 91.5781, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.14195583596214512, |
|
"grad_norm": 990.6913452148438, |
|
"learning_rate": 3.9375e-05, |
|
"loss": 90.6831, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.14420910319963948, |
|
"grad_norm": 1220.7005615234375, |
|
"learning_rate": 4e-05, |
|
"loss": 91.974, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.14646237043713384, |
|
"grad_norm": 4140.04345703125, |
|
"learning_rate": 4.0625000000000005e-05, |
|
"loss": 86.4978, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.1487156376746282, |
|
"grad_norm": 2063.628173828125, |
|
"learning_rate": 4.125e-05, |
|
"loss": 93.2286, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.1509689049121226, |
|
"grad_norm": 1446.00439453125, |
|
"learning_rate": 4.1875e-05, |
|
"loss": 85.1075, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.15322217214961695, |
|
"grad_norm": 1431.2410888671875, |
|
"learning_rate": 4.25e-05, |
|
"loss": 90.0661, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.1554754393871113, |
|
"grad_norm": 4932.36572265625, |
|
"learning_rate": 4.3125000000000005e-05, |
|
"loss": 90.6297, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.15772870662460567, |
|
"grad_norm": 765.8735961914062, |
|
"learning_rate": 4.375e-05, |
|
"loss": 80.5397, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.15998197386210006, |
|
"grad_norm": 4119.40673828125, |
|
"learning_rate": 4.4375e-05, |
|
"loss": 81.6614, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.16223524109959442, |
|
"grad_norm": 1764.3875732421875, |
|
"learning_rate": 4.5e-05, |
|
"loss": 88.7843, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.16448850833708878, |
|
"grad_norm": 762.2324829101562, |
|
"learning_rate": 4.5625e-05, |
|
"loss": 82.4272, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.16674177557458314, |
|
"grad_norm": 1424.8287353515625, |
|
"learning_rate": 4.6250000000000006e-05, |
|
"loss": 90.781, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.1689950428120775, |
|
"grad_norm": 1720.607421875, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 83.0496, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.1712483100495719, |
|
"grad_norm": 2539.0263671875, |
|
"learning_rate": 4.75e-05, |
|
"loss": 79.4029, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.17350157728706625, |
|
"grad_norm": 6733.1591796875, |
|
"learning_rate": 4.8125000000000004e-05, |
|
"loss": 80.8961, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.1757548445245606, |
|
"grad_norm": 3785.265869140625, |
|
"learning_rate": 4.875e-05, |
|
"loss": 77.2641, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.17800811176205497, |
|
"grad_norm": 820.4760131835938, |
|
"learning_rate": 4.937500000000001e-05, |
|
"loss": 82.2043, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.18026137899954936, |
|
"grad_norm": 717.660400390625, |
|
"learning_rate": 5e-05, |
|
"loss": 80.1946, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.18251464623704372, |
|
"grad_norm": 1224.3692626953125, |
|
"learning_rate": 4.999976201801837e-05, |
|
"loss": 79.4519, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.18476791347453808, |
|
"grad_norm": 974.4324951171875, |
|
"learning_rate": 4.999904807660428e-05, |
|
"loss": 75.99, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.18702118071203244, |
|
"grad_norm": 3050.068359375, |
|
"learning_rate": 4.999785818935018e-05, |
|
"loss": 76.9602, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.1892744479495268, |
|
"grad_norm": 2585.671630859375, |
|
"learning_rate": 4.9996192378909786e-05, |
|
"loss": 76.214, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.1915277151870212, |
|
"grad_norm": 1460.472412109375, |
|
"learning_rate": 4.999405067699773e-05, |
|
"loss": 77.366, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.19378098242451555, |
|
"grad_norm": 1475.039306640625, |
|
"learning_rate": 4.999143312438893e-05, |
|
"loss": 68.2295, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.1960342496620099, |
|
"grad_norm": 1217.306396484375, |
|
"learning_rate": 4.9988339770917825e-05, |
|
"loss": 75.2926, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.19828751689950427, |
|
"grad_norm": 1172.7359619140625, |
|
"learning_rate": 4.99847706754774e-05, |
|
"loss": 73.9257, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.20054078413699863, |
|
"grad_norm": 1494.612548828125, |
|
"learning_rate": 4.9980725906018074e-05, |
|
"loss": 73.2696, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.20279405137449302, |
|
"grad_norm": 698.787353515625, |
|
"learning_rate": 4.997620553954645e-05, |
|
"loss": 75.0695, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.20504731861198738, |
|
"grad_norm": 1705.808349609375, |
|
"learning_rate": 4.997120966212377e-05, |
|
"loss": 69.9942, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.20730058584948174, |
|
"grad_norm": 1201.5196533203125, |
|
"learning_rate": 4.996573836886435e-05, |
|
"loss": 72.2949, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.2095538530869761, |
|
"grad_norm": 658.273193359375, |
|
"learning_rate": 4.995979176393372e-05, |
|
"loss": 72.9935, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.2118071203244705, |
|
"grad_norm": 1103.2303466796875, |
|
"learning_rate": 4.9953369960546676e-05, |
|
"loss": 80.898, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.21406038756196485, |
|
"grad_norm": 872.4283447265625, |
|
"learning_rate": 4.994647308096509e-05, |
|
"loss": 70.9107, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.21631365479945922, |
|
"grad_norm": 2087.058349609375, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 73.337, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.21856692203695358, |
|
"grad_norm": 679.9100952148438, |
|
"learning_rate": 4.9931254627487145e-05, |
|
"loss": 62.9499, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.22082018927444794, |
|
"grad_norm": 2349.733154296875, |
|
"learning_rate": 4.99229333433282e-05, |
|
"loss": 78.2534, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.22307345651194233, |
|
"grad_norm": 942.2659912109375, |
|
"learning_rate": 4.9914137562444044e-05, |
|
"loss": 72.2439, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.22532672374943669, |
|
"grad_norm": 4470.63916015625, |
|
"learning_rate": 4.990486745229364e-05, |
|
"loss": 70.3106, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.22757999098693105, |
|
"grad_norm": 1237.9471435546875, |
|
"learning_rate": 4.989512318936655e-05, |
|
"loss": 71.3457, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.2298332582244254, |
|
"grad_norm": 1329.12060546875, |
|
"learning_rate": 4.988490495917947e-05, |
|
"loss": 67.5981, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.2320865254619198, |
|
"grad_norm": 935.1795654296875, |
|
"learning_rate": 4.987421295627279e-05, |
|
"loss": 76.1853, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.23433979269941416, |
|
"grad_norm": 1066.6749267578125, |
|
"learning_rate": 4.9863047384206835e-05, |
|
"loss": 71.3548, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.23659305993690852, |
|
"grad_norm": 1230.3519287109375, |
|
"learning_rate": 4.985140845555799e-05, |
|
"loss": 74.292, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.23884632717440288, |
|
"grad_norm": 1099.618408203125, |
|
"learning_rate": 4.983929639191469e-05, |
|
"loss": 69.2547, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.24109959441189724, |
|
"grad_norm": 1077.372314453125, |
|
"learning_rate": 4.982671142387316e-05, |
|
"loss": 65.676, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.24335286164939163, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.9813653791033057e-05, |
|
"loss": 69.037, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.245606128886886, |
|
"grad_norm": 923.2297973632812, |
|
"learning_rate": 4.980012374199288e-05, |
|
"loss": 62.7163, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.24785939612438035, |
|
"grad_norm": 672.335693359375, |
|
"learning_rate": 4.9786121534345265e-05, |
|
"loss": 64.182, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.2501126633618747, |
|
"grad_norm": 2046.09326171875, |
|
"learning_rate": 4.977164743467206e-05, |
|
"loss": 69.5933, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.25236593059936907, |
|
"grad_norm": 466.8750305175781, |
|
"learning_rate": 4.975670171853926e-05, |
|
"loss": 68.209, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.25461919783686343, |
|
"grad_norm": 1894.697998046875, |
|
"learning_rate": 4.974128467049176e-05, |
|
"loss": 67.2639, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.2568724650743578, |
|
"grad_norm": 1386.5361328125, |
|
"learning_rate": 4.9725396584047925e-05, |
|
"loss": 69.1241, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.2591257323118522, |
|
"grad_norm": 2561.32275390625, |
|
"learning_rate": 4.970903776169402e-05, |
|
"loss": 68.5936, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.26137899954934657, |
|
"grad_norm": 1006.3966064453125, |
|
"learning_rate": 4.9692208514878444e-05, |
|
"loss": 63.8572, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.26363226678684093, |
|
"grad_norm": 1034.1533203125, |
|
"learning_rate": 4.96749091640058e-05, |
|
"loss": 60.131, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.2658855340243353, |
|
"grad_norm": 468.8936462402344, |
|
"learning_rate": 4.965714003843079e-05, |
|
"loss": 62.9078, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.26813880126182965, |
|
"grad_norm": 545.410888671875, |
|
"learning_rate": 4.9638901476451946e-05, |
|
"loss": 66.2326, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.270392068499324, |
|
"grad_norm": 4246.572265625, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 66.3811, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.2726453357368184, |
|
"grad_norm": 1565.358642578125, |
|
"learning_rate": 4.960101744115727e-05, |
|
"loss": 64.098, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.27489860297431273, |
|
"grad_norm": 949.9479370117188, |
|
"learning_rate": 4.958137268909887e-05, |
|
"loss": 63.714, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.2771518702118071, |
|
"grad_norm": 1055.019775390625, |
|
"learning_rate": 4.956125994313774e-05, |
|
"loss": 69.4046, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.2794051374493015, |
|
"grad_norm": 2204.83935546875, |
|
"learning_rate": 4.9540679586191605e-05, |
|
"loss": 62.6045, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.28165840468679587, |
|
"grad_norm": 607.0169067382812, |
|
"learning_rate": 4.951963201008076e-05, |
|
"loss": 62.7449, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.28391167192429023, |
|
"grad_norm": 1606.60888671875, |
|
"learning_rate": 4.949811761552074e-05, |
|
"loss": 64.348, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.2861649391617846, |
|
"grad_norm": 1324.9617919921875, |
|
"learning_rate": 4.94761368121146e-05, |
|
"loss": 63.6378, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.28841820639927895, |
|
"grad_norm": 614.9827270507812, |
|
"learning_rate": 4.9453690018345144e-05, |
|
"loss": 64.5319, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.2906714736367733, |
|
"grad_norm": 620.3219604492188, |
|
"learning_rate": 4.943077766156697e-05, |
|
"loss": 60.6608, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.2929247408742677, |
|
"grad_norm": 1146.55126953125, |
|
"learning_rate": 4.940740017799833e-05, |
|
"loss": 60.1688, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.29517800811176204, |
|
"grad_norm": 459.2867126464844, |
|
"learning_rate": 4.938355801271282e-05, |
|
"loss": 67.1272, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.2974312753492564, |
|
"grad_norm": 1162.4820556640625, |
|
"learning_rate": 4.9359251619630886e-05, |
|
"loss": 59.3066, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.2996845425867508, |
|
"grad_norm": 718.1458129882812, |
|
"learning_rate": 4.9334481461511215e-05, |
|
"loss": 62.1788, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.3019378098242452, |
|
"grad_norm": 1042.899169921875, |
|
"learning_rate": 4.9309248009941914e-05, |
|
"loss": 63.2037, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.30419107706173953, |
|
"grad_norm": 350.5531005859375, |
|
"learning_rate": 4.9283551745331534e-05, |
|
"loss": 60.271, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.3064443442992339, |
|
"grad_norm": 622.0228271484375, |
|
"learning_rate": 4.925739315689991e-05, |
|
"loss": 58.0221, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.30869761153672826, |
|
"grad_norm": 588.911865234375, |
|
"learning_rate": 4.9230772742668866e-05, |
|
"loss": 58.5713, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.3109508787742226, |
|
"grad_norm": 1807.9488525390625, |
|
"learning_rate": 4.92036910094527e-05, |
|
"loss": 63.6907, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.313204146011717, |
|
"grad_norm": 1166.9796142578125, |
|
"learning_rate": 4.9176148472848584e-05, |
|
"loss": 68.026, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.31545741324921134, |
|
"grad_norm": 1430.736572265625, |
|
"learning_rate": 4.914814565722671e-05, |
|
"loss": 60.4505, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.3177106804867057, |
|
"grad_norm": 678.8543701171875, |
|
"learning_rate": 4.9119683095720324e-05, |
|
"loss": 58.989, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.3199639477242001, |
|
"grad_norm": 1388.1710205078125, |
|
"learning_rate": 4.909076133021557e-05, |
|
"loss": 61.1404, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.3222172149616945, |
|
"grad_norm": 1086.327392578125, |
|
"learning_rate": 4.906138091134118e-05, |
|
"loss": 60.7471, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.32447048219918884, |
|
"grad_norm": 1210.8642578125, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 58.4001, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.3267237494366832, |
|
"grad_norm": 2312.1982421875, |
|
"learning_rate": 4.9001246359648224e-05, |
|
"loss": 62.6361, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.32897701667417756, |
|
"grad_norm": 1599.8291015625, |
|
"learning_rate": 4.8970493371704826e-05, |
|
"loss": 56.6325, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.3312302839116719, |
|
"grad_norm": 1875.2379150390625, |
|
"learning_rate": 4.8939284020120363e-05, |
|
"loss": 62.8075, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.3334835511491663, |
|
"grad_norm": 767.9666748046875, |
|
"learning_rate": 4.890761889907589e-05, |
|
"loss": 60.694, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.33573681838666064, |
|
"grad_norm": 827.0344848632812, |
|
"learning_rate": 4.8875498611429674e-05, |
|
"loss": 62.9213, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.337990085624155, |
|
"grad_norm": 536.530029296875, |
|
"learning_rate": 4.884292376870567e-05, |
|
"loss": 63.3655, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.3402433528616494, |
|
"grad_norm": 722.9906005859375, |
|
"learning_rate": 4.8809894991081964e-05, |
|
"loss": 59.6742, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.3424966200991438, |
|
"grad_norm": 514.8473510742188, |
|
"learning_rate": 4.877641290737884e-05, |
|
"loss": 54.7803, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.34474988733663814, |
|
"grad_norm": 504.1208801269531, |
|
"learning_rate": 4.874247815504693e-05, |
|
"loss": 62.341, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.3470031545741325, |
|
"grad_norm": 1017.212646484375, |
|
"learning_rate": 4.8708091380154984e-05, |
|
"loss": 60.4433, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.34925642181162686, |
|
"grad_norm": 1013.2379760742188, |
|
"learning_rate": 4.867325323737765e-05, |
|
"loss": 60.4723, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.3515096890491212, |
|
"grad_norm": 1699.07861328125, |
|
"learning_rate": 4.8637964389982926e-05, |
|
"loss": 58.7195, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.3537629562866156, |
|
"grad_norm": 672.705078125, |
|
"learning_rate": 4.860222550981961e-05, |
|
"loss": 58.9785, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.35601622352410994, |
|
"grad_norm": 1399.3095703125, |
|
"learning_rate": 4.856603727730447e-05, |
|
"loss": 63.84, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.3582694907616043, |
|
"grad_norm": 885.7625122070312, |
|
"learning_rate": 4.852940038140927e-05, |
|
"loss": 61.7109, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.3605227579990987, |
|
"grad_norm": 943.9271240234375, |
|
"learning_rate": 4.849231551964771e-05, |
|
"loss": 57.4755, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.3627760252365931, |
|
"grad_norm": 696.8076782226562, |
|
"learning_rate": 4.8454783398062106e-05, |
|
"loss": 60.5156, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.36502929247408744, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.8416804731209945e-05, |
|
"loss": 57.4523, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.3672825597115818, |
|
"grad_norm": 1032.58154296875, |
|
"learning_rate": 4.83783802421503e-05, |
|
"loss": 59.4625, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.36953582694907616, |
|
"grad_norm": 977.2448120117188, |
|
"learning_rate": 4.8339510662430046e-05, |
|
"loss": 56.669, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.3717890941865705, |
|
"grad_norm": 674.7769165039062, |
|
"learning_rate": 4.830019673206997e-05, |
|
"loss": 56.777, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.3740423614240649, |
|
"grad_norm": 477.216552734375, |
|
"learning_rate": 4.826043919955062e-05, |
|
"loss": 58.6639, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.37629562866155924, |
|
"grad_norm": 350.3633728027344, |
|
"learning_rate": 4.822023882179811e-05, |
|
"loss": 62.8859, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.3785488958990536, |
|
"grad_norm": 2964.301025390625, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 60.454, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.380802163136548, |
|
"grad_norm": 1536.1146240234375, |
|
"learning_rate": 4.813851260043916e-05, |
|
"loss": 59.736, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.3830554303740424, |
|
"grad_norm": 477.4852600097656, |
|
"learning_rate": 4.8096988312782174e-05, |
|
"loss": 61.6154, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.38530869761153674, |
|
"grad_norm": 1278.5770263671875, |
|
"learning_rate": 4.80550242917613e-05, |
|
"loss": 60.3102, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.3875619648490311, |
|
"grad_norm": 345.54974365234375, |
|
"learning_rate": 4.8012621336311016e-05, |
|
"loss": 52.0934, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.38981523208652546, |
|
"grad_norm": 1254.111083984375, |
|
"learning_rate": 4.796978025372246e-05, |
|
"loss": 54.1746, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.3920684993240198, |
|
"grad_norm": 2557.150146484375, |
|
"learning_rate": 4.79265018596281e-05, |
|
"loss": 61.9285, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.3943217665615142, |
|
"grad_norm": 1335.9613037109375, |
|
"learning_rate": 4.788278697798618e-05, |
|
"loss": 54.5102, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.39657503379900855, |
|
"grad_norm": 1397.8980712890625, |
|
"learning_rate": 4.783863644106502e-05, |
|
"loss": 56.5746, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.3988283010365029, |
|
"grad_norm": 1394.6300048828125, |
|
"learning_rate": 4.7794051089427214e-05, |
|
"loss": 62.5688, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.40108156827399727, |
|
"grad_norm": 1244.0089111328125, |
|
"learning_rate": 4.7749031771913584e-05, |
|
"loss": 55.5008, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.4033348355114917, |
|
"grad_norm": 1220.64208984375, |
|
"learning_rate": 4.7703579345627035e-05, |
|
"loss": 58.7905, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.40558810274898605, |
|
"grad_norm": 897.0527954101562, |
|
"learning_rate": 4.765769467591625e-05, |
|
"loss": 57.8451, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.4078413699864804, |
|
"grad_norm": 2359.15380859375, |
|
"learning_rate": 4.761137863635921e-05, |
|
"loss": 55.2025, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.41009463722397477, |
|
"grad_norm": 1171.134765625, |
|
"learning_rate": 4.756463210874652e-05, |
|
"loss": 58.9177, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.41234790446146913, |
|
"grad_norm": 1182.134521484375, |
|
"learning_rate": 4.7517455983064694e-05, |
|
"loss": 58.9359, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.4146011716989635, |
|
"grad_norm": 866.15478515625, |
|
"learning_rate": 4.7469851157479177e-05, |
|
"loss": 60.2409, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.41685443893645785, |
|
"grad_norm": 929.9954833984375, |
|
"learning_rate": 4.742181853831721e-05, |
|
"loss": 59.9906, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.4191077061739522, |
|
"grad_norm": 862.0828247070312, |
|
"learning_rate": 4.737335904005063e-05, |
|
"loss": 53.0986, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.42136097341144657, |
|
"grad_norm": 1020.1170654296875, |
|
"learning_rate": 4.732447358527843e-05, |
|
"loss": 53.5393, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.423614240648941, |
|
"grad_norm": 963.4601440429688, |
|
"learning_rate": 4.72751631047092e-05, |
|
"loss": 53.7475, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.42586750788643535, |
|
"grad_norm": 935.1727294921875, |
|
"learning_rate": 4.722542853714341e-05, |
|
"loss": 58.3295, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.4281207751239297, |
|
"grad_norm": 1638.548828125, |
|
"learning_rate": 4.717527082945554e-05, |
|
"loss": 54.7367, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.43037404236142407, |
|
"grad_norm": 1193.1241455078125, |
|
"learning_rate": 4.712469093657605e-05, |
|
"loss": 55.434, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.43262730959891843, |
|
"grad_norm": 1653.6728515625, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 59.1439, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.4348805768364128, |
|
"grad_norm": 880.736572265625, |
|
"learning_rate": 4.7022268455134646e-05, |
|
"loss": 55.1307, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.43713384407390715, |
|
"grad_norm": 540.3501586914062, |
|
"learning_rate": 4.697042781654913e-05, |
|
"loss": 58.1672, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.4393871113114015, |
|
"grad_norm": 1146.242431640625, |
|
"learning_rate": 4.69181688926877e-05, |
|
"loss": 56.7247, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.4416403785488959, |
|
"grad_norm": 761.1096801757812, |
|
"learning_rate": 4.6865492678484895e-05, |
|
"loss": 58.7413, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.4438936457863903, |
|
"grad_norm": 840.3340454101562, |
|
"learning_rate": 4.681240017681993e-05, |
|
"loss": 53.7744, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.44614691302388465, |
|
"grad_norm": 1110.4306640625, |
|
"learning_rate": 4.6758892398497494e-05, |
|
"loss": 55.2062, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.448400180261379, |
|
"grad_norm": 1429.2696533203125, |
|
"learning_rate": 4.670497036222856e-05, |
|
"loss": 54.0191, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.45065344749887337, |
|
"grad_norm": 1162.6312255859375, |
|
"learning_rate": 4.665063509461097e-05, |
|
"loss": 55.5365, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.45290671473636773, |
|
"grad_norm": 1046.4312744140625, |
|
"learning_rate": 4.659588763010989e-05, |
|
"loss": 56.4798, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.4551599819738621, |
|
"grad_norm": 1190.293212890625, |
|
"learning_rate": 4.6540729011038146e-05, |
|
"loss": 51.9015, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.45741324921135645, |
|
"grad_norm": 1765.5267333984375, |
|
"learning_rate": 4.648516028753632e-05, |
|
"loss": 51.6989, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.4596665164488508, |
|
"grad_norm": 717.52001953125, |
|
"learning_rate": 4.642918251755281e-05, |
|
"loss": 52.7842, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.4619197836863452, |
|
"grad_norm": 621.5160522460938, |
|
"learning_rate": 4.637279676682367e-05, |
|
"loss": 57.9222, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.4641730509238396, |
|
"grad_norm": 817.9072875976562, |
|
"learning_rate": 4.6316004108852305e-05, |
|
"loss": 56.0796, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.46642631816133395, |
|
"grad_norm": 915.669921875, |
|
"learning_rate": 4.6258805624889075e-05, |
|
"loss": 56.7081, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.4686795853988283, |
|
"grad_norm": 1008.6634521484375, |
|
"learning_rate": 4.620120240391065e-05, |
|
"loss": 59.5977, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.4709328526363227, |
|
"grad_norm": 1886.614501953125, |
|
"learning_rate": 4.614319554259934e-05, |
|
"loss": 54.6364, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.47318611987381703, |
|
"grad_norm": 1374.013671875, |
|
"learning_rate": 4.608478614532215e-05, |
|
"loss": 56.6813, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.4754393871113114, |
|
"grad_norm": 615.3844604492188, |
|
"learning_rate": 4.602597532410981e-05, |
|
"loss": 53.3524, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.47769265434880576, |
|
"grad_norm": 2033.61962890625, |
|
"learning_rate": 4.5966764198635606e-05, |
|
"loss": 53.9897, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.4799459215863001, |
|
"grad_norm": 2901.83642578125, |
|
"learning_rate": 4.5907153896193985e-05, |
|
"loss": 51.6291, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.4821991888237945, |
|
"grad_norm": 1315.8509521484375, |
|
"learning_rate": 4.5847145551679206e-05, |
|
"loss": 48.8819, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.4844524560612889, |
|
"grad_norm": 1098.4552001953125, |
|
"learning_rate": 4.5786740307563636e-05, |
|
"loss": 49.5009, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.48670572329878325, |
|
"grad_norm": 1847.3548583984375, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 53.9551, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.4889589905362776, |
|
"grad_norm": 965.4677734375, |
|
"learning_rate": 4.566474372817972e-05, |
|
"loss": 52.8141, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.491212257773772, |
|
"grad_norm": 801.56005859375, |
|
"learning_rate": 4.5603154715550386e-05, |
|
"loss": 56.9722, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.49346552501126634, |
|
"grad_norm": 1328.810546875, |
|
"learning_rate": 4.55411734485541e-05, |
|
"loss": 58.1957, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.4957187922487607, |
|
"grad_norm": 4401.68115234375, |
|
"learning_rate": 4.54788011072248e-05, |
|
"loss": 57.8721, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.49797205948625506, |
|
"grad_norm": 3651.903564453125, |
|
"learning_rate": 4.541603887904198e-05, |
|
"loss": 53.8822, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.5002253267237494, |
|
"grad_norm": 490.0090637207031, |
|
"learning_rate": 4.535288795890798e-05, |
|
"loss": 53.9883, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.5024785939612438, |
|
"grad_norm": 492.57373046875, |
|
"learning_rate": 4.528934954912531e-05, |
|
"loss": 48.5514, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.5047318611987381, |
|
"grad_norm": 377.37811279296875, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 59.0365, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.5069851284362326, |
|
"grad_norm": 2599.677001953125, |
|
"learning_rate": 4.516111510668707e-05, |
|
"loss": 55.2056, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.5092383956737269, |
|
"grad_norm": 646.6531372070312, |
|
"learning_rate": 4.509642151543043e-05, |
|
"loss": 55.0129, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.5114916629112213, |
|
"grad_norm": 639.3536987304688, |
|
"learning_rate": 4.503134531727652e-05, |
|
"loss": 56.7601, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.5137449301487156, |
|
"grad_norm": 991.4733276367188, |
|
"learning_rate": 4.496588775118232e-05, |
|
"loss": 54.7894, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.51599819738621, |
|
"grad_norm": 577.932373046875, |
|
"learning_rate": 4.490005006336555e-05, |
|
"loss": 55.8032, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.5182514646237044, |
|
"grad_norm": 973.028564453125, |
|
"learning_rate": 4.4833833507280884e-05, |
|
"loss": 55.5173, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.5205047318611987, |
|
"grad_norm": 1144.3153076171875, |
|
"learning_rate": 4.476723934359609e-05, |
|
"loss": 54.9895, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.5227579990986931, |
|
"grad_norm": 1019.5435180664062, |
|
"learning_rate": 4.4700268840168045e-05, |
|
"loss": 52.9062, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.5250112663361874, |
|
"grad_norm": 874.452880859375, |
|
"learning_rate": 4.463292327201862e-05, |
|
"loss": 52.9759, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.5272645335736819, |
|
"grad_norm": 578.9571533203125, |
|
"learning_rate": 4.456520392131035e-05, |
|
"loss": 49.5633, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.5295178008111762, |
|
"grad_norm": 1387.6461181640625, |
|
"learning_rate": 4.4497112077322044e-05, |
|
"loss": 53.8476, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.5317710680486706, |
|
"grad_norm": 5335.48681640625, |
|
"learning_rate": 4.442864903642428e-05, |
|
"loss": 54.16, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.5340243352861649, |
|
"grad_norm": 1112.384033203125, |
|
"learning_rate": 4.435981610205464e-05, |
|
"loss": 53.531, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.5362776025236593, |
|
"grad_norm": 1009.0767822265625, |
|
"learning_rate": 4.4290614584693004e-05, |
|
"loss": 52.0493, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.5385308697611537, |
|
"grad_norm": 395.3855285644531, |
|
"learning_rate": 4.4221045801836494e-05, |
|
"loss": 49.3474, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.540784136998648, |
|
"grad_norm": 785.1640014648438, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 54.9418, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.5430374042361424, |
|
"grad_norm": 650.4179077148438, |
|
"learning_rate": 4.408081174456322e-05, |
|
"loss": 53.81, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.5452906714736367, |
|
"grad_norm": 1124.389892578125, |
|
"learning_rate": 4.401014914000078e-05, |
|
"loss": 53.7556, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.5475439387111312, |
|
"grad_norm": 891.2913818359375, |
|
"learning_rate": 4.393912460960124e-05, |
|
"loss": 49.5907, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.5497972059486255, |
|
"grad_norm": 1058.59228515625, |
|
"learning_rate": 4.386773950556931e-05, |
|
"loss": 55.9397, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.5520504731861199, |
|
"grad_norm": 1821.5123291015625, |
|
"learning_rate": 4.379599518697444e-05, |
|
"loss": 53.1642, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.5543037404236142, |
|
"grad_norm": 3784.1845703125, |
|
"learning_rate": 4.372389301972506e-05, |
|
"loss": 54.2371, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.5565570076611086, |
|
"grad_norm": 1150.9434814453125, |
|
"learning_rate": 4.3651434376542486e-05, |
|
"loss": 53.3396, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.558810274898603, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.357862063693486e-05, |
|
"loss": 47.3, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.5610635421360973, |
|
"grad_norm": 7060.201171875, |
|
"learning_rate": 4.3505453187170805e-05, |
|
"loss": 54.3783, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.5633168093735917, |
|
"grad_norm": 751.5445556640625, |
|
"learning_rate": 4.34319334202531e-05, |
|
"loss": 57.37, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.565570076611086, |
|
"grad_norm": 378.6798095703125, |
|
"learning_rate": 4.335806273589214e-05, |
|
"loss": 49.7573, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.5678233438485805, |
|
"grad_norm": 669.6986083984375, |
|
"learning_rate": 4.3283842540479264e-05, |
|
"loss": 50.9783, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.5700766110860748, |
|
"grad_norm": 737.0020751953125, |
|
"learning_rate": 4.3209274247060004e-05, |
|
"loss": 52.4549, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.5723298783235692, |
|
"grad_norm": 626.1477661132812, |
|
"learning_rate": 4.313435927530719e-05, |
|
"loss": 54.6302, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.5745831455610635, |
|
"grad_norm": 866.2355346679688, |
|
"learning_rate": 4.305909905149389e-05, |
|
"loss": 53.9909, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.5768364127985579, |
|
"grad_norm": 327.4191589355469, |
|
"learning_rate": 4.2983495008466276e-05, |
|
"loss": 53.452, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.5790896800360523, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.290754858561637e-05, |
|
"loss": 51.8147, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.5813429472735466, |
|
"grad_norm": 1892.76416015625, |
|
"learning_rate": 4.2831261228854544e-05, |
|
"loss": 52.1455, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.583596214511041, |
|
"grad_norm": 536.7318115234375, |
|
"learning_rate": 4.275463439058214e-05, |
|
"loss": 53.9379, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.5858494817485354, |
|
"grad_norm": 750.3458251953125, |
|
"learning_rate": 4.267766952966369e-05, |
|
"loss": 47.4762, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.5881027489860298, |
|
"grad_norm": 730.2880249023438, |
|
"learning_rate": 4.260036811139921e-05, |
|
"loss": 54.3805, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.5903560162235241, |
|
"grad_norm": 600.27392578125, |
|
"learning_rate": 4.2522731607496275e-05, |
|
"loss": 56.3112, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.5926092834610185, |
|
"grad_norm": 919.752197265625, |
|
"learning_rate": 4.244476149604201e-05, |
|
"loss": 48.9764, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.5948625506985128, |
|
"grad_norm": 3348.833251953125, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 48.8116, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.5971158179360072, |
|
"grad_norm": 438.4606628417969, |
|
"learning_rate": 4.228782639455674e-05, |
|
"loss": 56.4488, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.5993690851735016, |
|
"grad_norm": 654.3180541992188, |
|
"learning_rate": 4.220886439234385e-05, |
|
"loss": 48.5728, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.6016223524109959, |
|
"grad_norm": 1312.3807373046875, |
|
"learning_rate": 4.212957475815898e-05, |
|
"loss": 51.3269, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.6038756196484903, |
|
"grad_norm": 869.6898803710938, |
|
"learning_rate": 4.2049959001562464e-05, |
|
"loss": 54.4109, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.6061288868859847, |
|
"grad_norm": 379.0628662109375, |
|
"learning_rate": 4.197001863832355e-05, |
|
"loss": 53.1673, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.6083821541234791, |
|
"grad_norm": 569.8794555664062, |
|
"learning_rate": 4.188975519039151e-05, |
|
"loss": 48.3691, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.6106354213609734, |
|
"grad_norm": 883.134765625, |
|
"learning_rate": 4.18091701858667e-05, |
|
"loss": 47.1612, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.6128886885984678, |
|
"grad_norm": 1318.8486328125, |
|
"learning_rate": 4.172826515897146e-05, |
|
"loss": 51.9336, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.6151419558359621, |
|
"grad_norm": 777.8239135742188, |
|
"learning_rate": 4.164704165002086e-05, |
|
"loss": 50.7255, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.6173952230734565, |
|
"grad_norm": 704.6067504882812, |
|
"learning_rate": 4.1565501205393445e-05, |
|
"loss": 51.1446, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.6196484903109509, |
|
"grad_norm": 649.2020263671875, |
|
"learning_rate": 4.148364537750172e-05, |
|
"loss": 53.8287, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.6219017575484452, |
|
"grad_norm": 3303.88720703125, |
|
"learning_rate": 4.140147572476268e-05, |
|
"loss": 52.5814, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.6241550247859396, |
|
"grad_norm": 750.2407836914062, |
|
"learning_rate": 4.131899381156806e-05, |
|
"loss": 52.0253, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.626408292023434, |
|
"grad_norm": 862.5943603515625, |
|
"learning_rate": 4.123620120825459e-05, |
|
"loss": 54.8922, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.6286615592609284, |
|
"grad_norm": 1933.6412353515625, |
|
"learning_rate": 4.11530994910741e-05, |
|
"loss": 50.71, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.6309148264984227, |
|
"grad_norm": 833.4996337890625, |
|
"learning_rate": 4.1069690242163484e-05, |
|
"loss": 50.5416, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.6331680937359171, |
|
"grad_norm": 737.939697265625, |
|
"learning_rate": 4.098597504951462e-05, |
|
"loss": 52.4712, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.6354213609734114, |
|
"grad_norm": 608.831298828125, |
|
"learning_rate": 4.09019555069441e-05, |
|
"loss": 49.1117, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.6376746282109058, |
|
"grad_norm": 450.9925231933594, |
|
"learning_rate": 4.081763321406291e-05, |
|
"loss": 50.1539, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.6399278954484002, |
|
"grad_norm": 1044.352294921875, |
|
"learning_rate": 4.073300977624594e-05, |
|
"loss": 50.8113, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.6421811626858945, |
|
"grad_norm": 847.0576782226562, |
|
"learning_rate": 4.064808680460148e-05, |
|
"loss": 53.6078, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.644434429923389, |
|
"grad_norm": 1176.281005859375, |
|
"learning_rate": 4.0562865915940496e-05, |
|
"loss": 50.8178, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.6466876971608833, |
|
"grad_norm": 1171.6334228515625, |
|
"learning_rate": 4.047734873274586e-05, |
|
"loss": 52.1101, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.6489409643983777, |
|
"grad_norm": 2008.2913818359375, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 51.2683, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.651194231635872, |
|
"grad_norm": 946.8640747070312, |
|
"learning_rate": 4.030543200086123e-05, |
|
"loss": 50.7668, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.6534474988733664, |
|
"grad_norm": 718.6983032226562, |
|
"learning_rate": 4.021903572521802e-05, |
|
"loss": 46.3652, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.6557007661108607, |
|
"grad_norm": 811.4840087890625, |
|
"learning_rate": 4.013234970107236e-05, |
|
"loss": 45.9392, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.6579540333483551, |
|
"grad_norm": 2850.653076171875, |
|
"learning_rate": 4.0045375578801214e-05, |
|
"loss": 45.6765, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.6602073005858495, |
|
"grad_norm": 1648.90087890625, |
|
"learning_rate": 3.995811501426648e-05, |
|
"loss": 55.2959, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.6624605678233438, |
|
"grad_norm": 926.3929443359375, |
|
"learning_rate": 3.9870569668783536e-05, |
|
"loss": 47.1888, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.6647138350608383, |
|
"grad_norm": 1050.4713134765625, |
|
"learning_rate": 3.978274120908956e-05, |
|
"loss": 46.4457, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.6669671022983326, |
|
"grad_norm": 709.8076171875, |
|
"learning_rate": 3.969463130731183e-05, |
|
"loss": 59.8226, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.669220369535827, |
|
"grad_norm": 2119.108154296875, |
|
"learning_rate": 3.9606241640935864e-05, |
|
"loss": 51.6369, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.6714736367733213, |
|
"grad_norm": 861.7589111328125, |
|
"learning_rate": 3.9517573892773494e-05, |
|
"loss": 50.3213, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.6737269040108157, |
|
"grad_norm": 858.5698852539062, |
|
"learning_rate": 3.942862975093085e-05, |
|
"loss": 53.4681, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.67598017124831, |
|
"grad_norm": 1666.9454345703125, |
|
"learning_rate": 3.933941090877615e-05, |
|
"loss": 49.9129, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.6782334384858044, |
|
"grad_norm": 766.8621826171875, |
|
"learning_rate": 3.924991906490758e-05, |
|
"loss": 50.3255, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.6804867057232988, |
|
"grad_norm": 415.349853515625, |
|
"learning_rate": 3.916015592312082e-05, |
|
"loss": 44.5914, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.6827399729607931, |
|
"grad_norm": 939.04443359375, |
|
"learning_rate": 3.907012319237672e-05, |
|
"loss": 48.9593, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.6849932401982876, |
|
"grad_norm": 771.4345703125, |
|
"learning_rate": 3.897982258676867e-05, |
|
"loss": 49.2372, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.6872465074357819, |
|
"grad_norm": 932.6231079101562, |
|
"learning_rate": 3.888925582549006e-05, |
|
"loss": 51.741, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.6894997746732763, |
|
"grad_norm": 637.57470703125, |
|
"learning_rate": 3.879842463280145e-05, |
|
"loss": 52.6971, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.6917530419107706, |
|
"grad_norm": 734.0154418945312, |
|
"learning_rate": 3.870733073799785e-05, |
|
"loss": 50.6578, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.694006309148265, |
|
"grad_norm": 683.8228759765625, |
|
"learning_rate": 3.861597587537568e-05, |
|
"loss": 49.1606, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.6962595763857593, |
|
"grad_norm": 414.70159912109375, |
|
"learning_rate": 3.8524361784199853e-05, |
|
"loss": 46.3476, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.6985128436232537, |
|
"grad_norm": 1327.72265625, |
|
"learning_rate": 3.84324902086706e-05, |
|
"loss": 48.834, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.7007661108607481, |
|
"grad_norm": 370.52392578125, |
|
"learning_rate": 3.834036289789029e-05, |
|
"loss": 46.4221, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.7030193780982424, |
|
"grad_norm": 7749.1181640625, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 49.2808, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.7052726453357369, |
|
"grad_norm": 1081.3465576171875, |
|
"learning_rate": 3.8155348091296736e-05, |
|
"loss": 48.1425, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.7075259125732312, |
|
"grad_norm": 630.7388305664062, |
|
"learning_rate": 3.8062464117898724e-05, |
|
"loss": 50.6762, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.7097791798107256, |
|
"grad_norm": 2894.5537109375, |
|
"learning_rate": 3.796933145401304e-05, |
|
"loss": 52.1551, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.7120324470482199, |
|
"grad_norm": 3425.33642578125, |
|
"learning_rate": 3.787595187275136e-05, |
|
"loss": 45.0564, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 1268.707275390625, |
|
"learning_rate": 3.77823271519263e-05, |
|
"loss": 48.459, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.7165389815232086, |
|
"grad_norm": 1132.0858154296875, |
|
"learning_rate": 3.7688459074017606e-05, |
|
"loss": 46.2624, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.718792248760703, |
|
"grad_norm": 896.9110717773438, |
|
"learning_rate": 3.759434942613816e-05, |
|
"loss": 47.7103, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.7210455159981974, |
|
"grad_norm": 1195.548095703125, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 51.0907, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.7232987832356917, |
|
"grad_norm": 2168.93017578125, |
|
"learning_rate": 3.7405412591880215e-05, |
|
"loss": 49.1653, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.7255520504731862, |
|
"grad_norm": 1866.5477294921875, |
|
"learning_rate": 3.731058900258668e-05, |
|
"loss": 49.1327, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.7278053177106805, |
|
"grad_norm": 624.8327026367188, |
|
"learning_rate": 3.721553103742388e-05, |
|
"loss": 50.1689, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.7300585849481749, |
|
"grad_norm": 974.498779296875, |
|
"learning_rate": 3.712024050615843e-05, |
|
"loss": 50.1442, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.7323118521856692, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.702471922298469e-05, |
|
"loss": 49.697, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.7345651194231636, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.692896900649021e-05, |
|
"loss": 46.7989, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.7368183866606579, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.6832991679621086e-05, |
|
"loss": 46.8958, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.7390716538981523, |
|
"grad_norm": 592.7318115234375, |
|
"learning_rate": 3.673678906964727e-05, |
|
"loss": 46.7422, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.7413249211356467, |
|
"grad_norm": 581.4388427734375, |
|
"learning_rate": 3.6640363008127784e-05, |
|
"loss": 47.2569, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.743578188373141, |
|
"grad_norm": 462.7564392089844, |
|
"learning_rate": 3.654371533087586e-05, |
|
"loss": 47.8859, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.7458314556106355, |
|
"grad_norm": 721.4935302734375, |
|
"learning_rate": 3.644684787792392e-05, |
|
"loss": 48.0441, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.7480847228481298, |
|
"grad_norm": 574.1445922851562, |
|
"learning_rate": 3.634976249348867e-05, |
|
"loss": 49.1217, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.7503379900856242, |
|
"grad_norm": 552.1611938476562, |
|
"learning_rate": 3.625246102593588e-05, |
|
"loss": 46.7031, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.7525912573231185, |
|
"grad_norm": 442.0963439941406, |
|
"learning_rate": 3.615494532774522e-05, |
|
"loss": 47.0946, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.7548445245606129, |
|
"grad_norm": 896.58837890625, |
|
"learning_rate": 3.6057217255475034e-05, |
|
"loss": 45.341, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.7570977917981072, |
|
"grad_norm": 784.356201171875, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 51.6972, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.7593510590356016, |
|
"grad_norm": 1248.8477783203125, |
|
"learning_rate": 3.586113143511043e-05, |
|
"loss": 48.825, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.761604326273096, |
|
"grad_norm": 875.2767333984375, |
|
"learning_rate": 3.576277742020738e-05, |
|
"loss": 52.262, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.7638575935105903, |
|
"grad_norm": 596.1762084960938, |
|
"learning_rate": 3.566421849753646e-05, |
|
"loss": 52.5734, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.7661108607480848, |
|
"grad_norm": 1226.5533447265625, |
|
"learning_rate": 3.556545654351749e-05, |
|
"loss": 48.9049, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.7683641279855791, |
|
"grad_norm": 863.8980102539062, |
|
"learning_rate": 3.54664934384357e-05, |
|
"loss": 44.9694, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.7706173952230735, |
|
"grad_norm": 1017.0120849609375, |
|
"learning_rate": 3.536733106640598e-05, |
|
"loss": 52.3725, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.7728706624605678, |
|
"grad_norm": 572.596435546875, |
|
"learning_rate": 3.526797131533693e-05, |
|
"loss": 47.3554, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.7751239296980622, |
|
"grad_norm": 397.4549560546875, |
|
"learning_rate": 3.516841607689501e-05, |
|
"loss": 54.4889, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.7773771969355565, |
|
"grad_norm": 1202.6942138671875, |
|
"learning_rate": 3.5068667246468436e-05, |
|
"loss": 49.3039, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.7796304641730509, |
|
"grad_norm": 989.998046875, |
|
"learning_rate": 3.496872672313116e-05, |
|
"loss": 42.5372, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.7818837314105452, |
|
"grad_norm": 1453.26416015625, |
|
"learning_rate": 3.486859640960668e-05, |
|
"loss": 49.7227, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.7841369986480397, |
|
"grad_norm": 416.6725769042969, |
|
"learning_rate": 3.476827821223184e-05, |
|
"loss": 46.2663, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.7863902658855341, |
|
"grad_norm": 889.0680541992188, |
|
"learning_rate": 3.466777404092052e-05, |
|
"loss": 46.2791, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.7886435331230284, |
|
"grad_norm": 573.9443969726562, |
|
"learning_rate": 3.456708580912725e-05, |
|
"loss": 45.8314, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.7908968003605228, |
|
"grad_norm": 747.3670654296875, |
|
"learning_rate": 3.446621543381083e-05, |
|
"loss": 43.1904, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.7931500675980171, |
|
"grad_norm": 884.3372192382812, |
|
"learning_rate": 3.436516483539781e-05, |
|
"loss": 48.3382, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.7954033348355115, |
|
"grad_norm": 690.1305541992188, |
|
"learning_rate": 3.426393593774591e-05, |
|
"loss": 46.8459, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.7976566020730058, |
|
"grad_norm": 3796.224365234375, |
|
"learning_rate": 3.4162530668107434e-05, |
|
"loss": 47.1848, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.7999098693105002, |
|
"grad_norm": 765.8935546875, |
|
"learning_rate": 3.406095095709254e-05, |
|
"loss": 47.8368, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.8021631365479945, |
|
"grad_norm": 1086.65625, |
|
"learning_rate": 3.39591987386325e-05, |
|
"loss": 51.3484, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.804416403785489, |
|
"grad_norm": 507.65338134765625, |
|
"learning_rate": 3.3857275949942893e-05, |
|
"loss": 43.1595, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.8066696710229834, |
|
"grad_norm": 5600.59521484375, |
|
"learning_rate": 3.375518453148669e-05, |
|
"loss": 47.4594, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.8089229382604777, |
|
"grad_norm": 2263.270263671875, |
|
"learning_rate": 3.365292642693732e-05, |
|
"loss": 42.8212, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.8111762054979721, |
|
"grad_norm": 761.8606567382812, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 44.9991, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.8134294727354664, |
|
"grad_norm": 1336.81787109375, |
|
"learning_rate": 3.344791795008318e-05, |
|
"loss": 46.3603, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.8156827399729608, |
|
"grad_norm": 726.6700439453125, |
|
"learning_rate": 3.3345171480844275e-05, |
|
"loss": 48.5755, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.8179360072104551, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.324226613156968e-05, |
|
"loss": 47.3854, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.8201892744479495, |
|
"grad_norm": 441.3335266113281, |
|
"learning_rate": 3.313920386142892e-05, |
|
"loss": 47.508, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.8224425416854438, |
|
"grad_norm": 665.80322265625, |
|
"learning_rate": 3.303598663257904e-05, |
|
"loss": 49.6352, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.8246958089229383, |
|
"grad_norm": 1343.923095703125, |
|
"learning_rate": 3.293261641012731e-05, |
|
"loss": 47.085, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.8269490761604327, |
|
"grad_norm": 524.6836547851562, |
|
"learning_rate": 3.2829095162093734e-05, |
|
"loss": 47.4216, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.829202343397927, |
|
"grad_norm": 1741.5296630859375, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 48.4512, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.8314556106354214, |
|
"grad_norm": 507.85040283203125, |
|
"learning_rate": 3.2621607475700275e-05, |
|
"loss": 45.0441, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.8337088778729157, |
|
"grad_norm": 694.7333374023438, |
|
"learning_rate": 3.251764498760683e-05, |
|
"loss": 45.3751, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.8359621451104101, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.241353937438927e-05, |
|
"loss": 47.7383, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.8382154123479044, |
|
"grad_norm": 535.8335571289062, |
|
"learning_rate": 3.230929261806842e-05, |
|
"loss": 50.2691, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.8404686795853988, |
|
"grad_norm": 533.4066772460938, |
|
"learning_rate": 3.2204906703352236e-05, |
|
"loss": 48.8035, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.8427219468228931, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.210038361759807e-05, |
|
"loss": 39.8506, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.8449752140603876, |
|
"grad_norm": 2554.993408203125, |
|
"learning_rate": 3.1995725350774806e-05, |
|
"loss": 44.7332, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.847228481297882, |
|
"grad_norm": 1027.2034912109375, |
|
"learning_rate": 3.1890933895424976e-05, |
|
"loss": 46.4409, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.8494817485353763, |
|
"grad_norm": 1057.863037109375, |
|
"learning_rate": 3.178601124662686e-05, |
|
"loss": 44.3963, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.8517350157728707, |
|
"grad_norm": 418.9128112792969, |
|
"learning_rate": 3.168095940195642e-05, |
|
"loss": 48.5565, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.853988283010365, |
|
"grad_norm": 894.1998291015625, |
|
"learning_rate": 3.157578036144937e-05, |
|
"loss": 45.6854, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.8562415502478594, |
|
"grad_norm": 2467.48486328125, |
|
"learning_rate": 3.147047612756302e-05, |
|
"loss": 45.3885, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.8584948174853537, |
|
"grad_norm": 807.8389282226562, |
|
"learning_rate": 3.136504870513819e-05, |
|
"loss": 46.2842, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.8607480847228481, |
|
"grad_norm": 1878.1251220703125, |
|
"learning_rate": 3.125950010136104e-05, |
|
"loss": 45.9782, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.8630013519603424, |
|
"grad_norm": 1414.4879150390625, |
|
"learning_rate": 3.115383232572483e-05, |
|
"loss": 46.0757, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.8652546191978369, |
|
"grad_norm": 1302.6700439453125, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 48.1511, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.8675078864353313, |
|
"grad_norm": 810.4852905273438, |
|
"learning_rate": 3.094214730815433e-05, |
|
"loss": 42.2249, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.8697611536728256, |
|
"grad_norm": 1316.186279296875, |
|
"learning_rate": 3.083613409639764e-05, |
|
"loss": 45.1847, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.87201442091032, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.073000977306036e-05, |
|
"loss": 40.2777, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.8742676881478143, |
|
"grad_norm": 574.6009521484375, |
|
"learning_rate": 3.062377635859663e-05, |
|
"loss": 44.6483, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.8765209553853087, |
|
"grad_norm": 723.0553588867188, |
|
"learning_rate": 3.0517435875537536e-05, |
|
"loss": 46.7895, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.878774222622803, |
|
"grad_norm": 626.0262451171875, |
|
"learning_rate": 3.0410990348452573e-05, |
|
"loss": 42.9903, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.8810274898602974, |
|
"grad_norm": 384.9597473144531, |
|
"learning_rate": 3.030444180391116e-05, |
|
"loss": 52.3541, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.8832807570977917, |
|
"grad_norm": 726.4917602539062, |
|
"learning_rate": 3.0197792270443982e-05, |
|
"loss": 43.4684, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.8855340243352862, |
|
"grad_norm": 783.1016235351562, |
|
"learning_rate": 3.0091043778504436e-05, |
|
"loss": 48.3162, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.8877872915727806, |
|
"grad_norm": 709.700439453125, |
|
"learning_rate": 2.9984198360429932e-05, |
|
"loss": 45.9866, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.8900405588102749, |
|
"grad_norm": 766.4548950195312, |
|
"learning_rate": 2.9877258050403212e-05, |
|
"loss": 43.3167, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.8922938260477693, |
|
"grad_norm": 547.8275756835938, |
|
"learning_rate": 2.9770224884413623e-05, |
|
"loss": 45.1045, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.8945470932852636, |
|
"grad_norm": 1067.27197265625, |
|
"learning_rate": 2.966310090021837e-05, |
|
"loss": 45.1934, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.896800360522758, |
|
"grad_norm": 906.0169677734375, |
|
"learning_rate": 2.9555888137303695e-05, |
|
"loss": 46.1919, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.8990536277602523, |
|
"grad_norm": 693.871826171875, |
|
"learning_rate": 2.9448588636846046e-05, |
|
"loss": 42.2423, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.9013068949977467, |
|
"grad_norm": 697.4348754882812, |
|
"learning_rate": 2.9341204441673266e-05, |
|
"loss": 46.1574, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.903560162235241, |
|
"grad_norm": 896.8306274414062, |
|
"learning_rate": 2.9233737596225613e-05, |
|
"loss": 46.1825, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.9058134294727355, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9126190146516942e-05, |
|
"loss": 51.3986, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.9080666967102299, |
|
"grad_norm": 228.38462829589844, |
|
"learning_rate": 2.9018564140095657e-05, |
|
"loss": 43.3596, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.9103199639477242, |
|
"grad_norm": 1101.621826171875, |
|
"learning_rate": 2.8910861626005776e-05, |
|
"loss": 44.0146, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.9125732311852186, |
|
"grad_norm": 913.1014404296875, |
|
"learning_rate": 2.8803084654747918e-05, |
|
"loss": 41.7263, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.9148264984227129, |
|
"grad_norm": 499.08642578125, |
|
"learning_rate": 2.8695235278240272e-05, |
|
"loss": 47.2988, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.9170797656602073, |
|
"grad_norm": 513.2366333007812, |
|
"learning_rate": 2.858731554977948e-05, |
|
"loss": 42.2196, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.9193330328977016, |
|
"grad_norm": 876.3751220703125, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 43.9035, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.921586300135196, |
|
"grad_norm": 698.6486206054688, |
|
"learning_rate": 2.837127325684308e-05, |
|
"loss": 49.8259, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.9238395673726904, |
|
"grad_norm": 522.728271484375, |
|
"learning_rate": 2.8263154805501297e-05, |
|
"loss": 40.6793, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.9260928346101848, |
|
"grad_norm": 13431.1171875, |
|
"learning_rate": 2.815497422839575e-05, |
|
"loss": 47.0175, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.9283461018476792, |
|
"grad_norm": 406.3808898925781, |
|
"learning_rate": 2.8046733585128687e-05, |
|
"loss": 49.5201, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.9305993690851735, |
|
"grad_norm": 528.2371826171875, |
|
"learning_rate": 2.7938434936445945e-05, |
|
"loss": 46.2839, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.9328526363226679, |
|
"grad_norm": 815.2286376953125, |
|
"learning_rate": 2.7830080344197674e-05, |
|
"loss": 43.2044, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.9351059035601622, |
|
"grad_norm": 547.2005004882812, |
|
"learning_rate": 2.7721671871299116e-05, |
|
"loss": 44.5253, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.9373591707976566, |
|
"grad_norm": 722.04931640625, |
|
"learning_rate": 2.761321158169134e-05, |
|
"loss": 41.0613, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.9396124380351509, |
|
"grad_norm": 583.0802612304688, |
|
"learning_rate": 2.7504701540301907e-05, |
|
"loss": 48.6149, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.9418657052726453, |
|
"grad_norm": 616.2501831054688, |
|
"learning_rate": 2.7396143813005602e-05, |
|
"loss": 45.1471, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.9441189725101397, |
|
"grad_norm": 680.4635620117188, |
|
"learning_rate": 2.7287540466585065e-05, |
|
"loss": 48.1059, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.9463722397476341, |
|
"grad_norm": 526.566162109375, |
|
"learning_rate": 2.717889356869146e-05, |
|
"loss": 41.3846, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.9486255069851285, |
|
"grad_norm": 518.4340209960938, |
|
"learning_rate": 2.7070205187805108e-05, |
|
"loss": 48.1914, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.9508787742226228, |
|
"grad_norm": 3767.502197265625, |
|
"learning_rate": 2.6961477393196126e-05, |
|
"loss": 46.3911, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.9531320414601172, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6852712254884988e-05, |
|
"loss": 45.1576, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.9553853086976115, |
|
"grad_norm": 724.995849609375, |
|
"learning_rate": 2.674391184360313e-05, |
|
"loss": 43.0639, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.9576385759351059, |
|
"grad_norm": 635.5662231445312, |
|
"learning_rate": 2.663507823075358e-05, |
|
"loss": 42.7185, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.9598918431726002, |
|
"grad_norm": 397.24517822265625, |
|
"learning_rate": 2.6526213488371427e-05, |
|
"loss": 42.7067, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.9621451104100947, |
|
"grad_norm": 1213.5648193359375, |
|
"learning_rate": 2.641731968908444e-05, |
|
"loss": 45.1527, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.964398377647589, |
|
"grad_norm": 1316.8245849609375, |
|
"learning_rate": 2.63083989060736e-05, |
|
"loss": 45.5445, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.9666516448850834, |
|
"grad_norm": 1318.21875, |
|
"learning_rate": 2.6199453213033598e-05, |
|
"loss": 44.2066, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.9689049121225778, |
|
"grad_norm": 890.9985961914062, |
|
"learning_rate": 2.6090484684133404e-05, |
|
"loss": 41.8035, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.9711581793600721, |
|
"grad_norm": 480.1272888183594, |
|
"learning_rate": 2.598149539397672e-05, |
|
"loss": 45.4357, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.9734114465975665, |
|
"grad_norm": 566.5352172851562, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 47.8986, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.9756647138350608, |
|
"grad_norm": 2252.18017578125, |
|
"learning_rate": 2.5763462830245572e-05, |
|
"loss": 45.0348, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.9779179810725552, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5654423707696833e-05, |
|
"loss": 46.8718, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.9801712483100495, |
|
"grad_norm": 585.69775390625, |
|
"learning_rate": 2.5545372125864032e-05, |
|
"loss": 43.0343, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.982424515547544, |
|
"grad_norm": 782.6468505859375, |
|
"learning_rate": 2.5436310160932092e-05, |
|
"loss": 44.3432, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.9846777827850383, |
|
"grad_norm": 369.2841796875, |
|
"learning_rate": 2.5327239889283612e-05, |
|
"loss": 41.9938, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.9869310500225327, |
|
"grad_norm": 836.7174072265625, |
|
"learning_rate": 2.521816338745935e-05, |
|
"loss": 44.0726, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.9891843172600271, |
|
"grad_norm": 738.8544921875, |
|
"learning_rate": 2.5109082732118665e-05, |
|
"loss": 45.6463, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.9914375844975214, |
|
"grad_norm": 912.4453735351562, |
|
"learning_rate": 2.5e-05, |
|
"loss": 45.5043, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.9936908517350158, |
|
"grad_norm": 2161.161865234375, |
|
"learning_rate": 2.4890917267881338e-05, |
|
"loss": 43.3618, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.9959441189725101, |
|
"grad_norm": 791.255859375, |
|
"learning_rate": 2.4781836612540657e-05, |
|
"loss": 46.0761, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.9981973862100045, |
|
"grad_norm": 812.304443359375, |
|
"learning_rate": 2.4672760110716394e-05, |
|
"loss": 45.6475, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 112.20081329345703, |
|
"eval_runtime": 881.8427, |
|
"eval_samples_per_second": 22.368, |
|
"eval_steps_per_second": 5.593, |
|
"step": 44380 |
|
}, |
|
{ |
|
"epoch": 1.0004506534474988, |
|
"grad_norm": 418.41058349609375, |
|
"learning_rate": 2.4563689839067913e-05, |
|
"loss": 44.6088, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 1.0027039206849933, |
|
"grad_norm": 484.3685607910156, |
|
"learning_rate": 2.4454627874135974e-05, |
|
"loss": 41.7446, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.0049571879224877, |
|
"grad_norm": 422.8623352050781, |
|
"learning_rate": 2.4345576292303176e-05, |
|
"loss": 37.9431, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 1.0072104551599819, |
|
"grad_norm": 612.1026000976562, |
|
"learning_rate": 2.4236537169754437e-05, |
|
"loss": 40.1997, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 1.0094637223974763, |
|
"grad_norm": 634.6718139648438, |
|
"learning_rate": 2.4127512582437485e-05, |
|
"loss": 38.9043, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 1.0117169896349707, |
|
"grad_norm": 660.1007080078125, |
|
"learning_rate": 2.4018504606023293e-05, |
|
"loss": 40.3333, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 1.0139702568724651, |
|
"grad_norm": 513.296142578125, |
|
"learning_rate": 2.3909515315866605e-05, |
|
"loss": 42.9966, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.0162235241099595, |
|
"grad_norm": 605.056640625, |
|
"learning_rate": 2.3800546786966408e-05, |
|
"loss": 39.9933, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 1.0184767913474537, |
|
"grad_norm": 856.895751953125, |
|
"learning_rate": 2.3691601093926404e-05, |
|
"loss": 37.0164, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 1.0207300585849481, |
|
"grad_norm": 413.656494140625, |
|
"learning_rate": 2.3582680310915558e-05, |
|
"loss": 38.9117, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 1.0229833258224426, |
|
"grad_norm": 1099.9735107421875, |
|
"learning_rate": 2.3473786511628575e-05, |
|
"loss": 38.1765, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 1.025236593059937, |
|
"grad_norm": 899.5122680664062, |
|
"learning_rate": 2.3364921769246423e-05, |
|
"loss": 38.9166, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.0274898602974312, |
|
"grad_norm": 2411.383544921875, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 38.2463, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 1.0297431275349256, |
|
"grad_norm": 542.501708984375, |
|
"learning_rate": 2.314728774511502e-05, |
|
"loss": 39.2675, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 1.03199639477242, |
|
"grad_norm": 566.046142578125, |
|
"learning_rate": 2.303852260680388e-05, |
|
"loss": 39.1687, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 1.0342496620099144, |
|
"grad_norm": 267.0126953125, |
|
"learning_rate": 2.2929794812194898e-05, |
|
"loss": 41.1642, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 1.0365029292474088, |
|
"grad_norm": 893.4733276367188, |
|
"learning_rate": 2.2821106431308544e-05, |
|
"loss": 41.2794, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.038756196484903, |
|
"grad_norm": 353.1875305175781, |
|
"learning_rate": 2.2712459533414944e-05, |
|
"loss": 41.7903, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 1.0410094637223974, |
|
"grad_norm": 749.4720458984375, |
|
"learning_rate": 2.26038561869944e-05, |
|
"loss": 40.0082, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 1.0432627309598919, |
|
"grad_norm": 488.6062316894531, |
|
"learning_rate": 2.24952984596981e-05, |
|
"loss": 40.3988, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 1.0455159981973863, |
|
"grad_norm": 1090.5692138671875, |
|
"learning_rate": 2.238678841830867e-05, |
|
"loss": 40.7657, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 1.0477692654348805, |
|
"grad_norm": 1349.6646728515625, |
|
"learning_rate": 2.2278328128700893e-05, |
|
"loss": 37.8727, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.0500225326723749, |
|
"grad_norm": 1600.57666015625, |
|
"learning_rate": 2.2169919655802335e-05, |
|
"loss": 41.3573, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 1.0522757999098693, |
|
"grad_norm": 321.0033874511719, |
|
"learning_rate": 2.2061565063554064e-05, |
|
"loss": 38.5797, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 1.0545290671473637, |
|
"grad_norm": 652.0059204101562, |
|
"learning_rate": 2.195326641487132e-05, |
|
"loss": 42.7963, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 1.0567823343848581, |
|
"grad_norm": 325.46575927734375, |
|
"learning_rate": 2.184502577160426e-05, |
|
"loss": 34.0251, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 1.0590356016223523, |
|
"grad_norm": 352.83941650390625, |
|
"learning_rate": 2.173684519449872e-05, |
|
"loss": 38.2834, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.0612888688598467, |
|
"grad_norm": 1244.226318359375, |
|
"learning_rate": 2.1628726743156933e-05, |
|
"loss": 36.7162, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 1.0635421360973412, |
|
"grad_norm": 1361.024658203125, |
|
"learning_rate": 2.1520672475998373e-05, |
|
"loss": 39.9933, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 1.0657954033348356, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.141268445022052e-05, |
|
"loss": 42.092, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 1.0680486705723298, |
|
"grad_norm": 681.4054565429688, |
|
"learning_rate": 2.1304764721759733e-05, |
|
"loss": 42.4679, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 1.0703019378098242, |
|
"grad_norm": 683.2991943359375, |
|
"learning_rate": 2.1196915345252084e-05, |
|
"loss": 37.9367, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.0725552050473186, |
|
"grad_norm": 412.36138916015625, |
|
"learning_rate": 2.1089138373994223e-05, |
|
"loss": 36.7721, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 1.074808472284813, |
|
"grad_norm": 1283.1080322265625, |
|
"learning_rate": 2.0981435859904346e-05, |
|
"loss": 40.7471, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 1.0770617395223074, |
|
"grad_norm": 885.2091064453125, |
|
"learning_rate": 2.087380985348306e-05, |
|
"loss": 40.407, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 1.0793150067598016, |
|
"grad_norm": 1542.7694091796875, |
|
"learning_rate": 2.0766262403774386e-05, |
|
"loss": 36.4963, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 1.081568273997296, |
|
"grad_norm": 827.3414916992188, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 36.856, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.0838215412347905, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.055141136315396e-05, |
|
"loss": 41.1057, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 1.0860748084722849, |
|
"grad_norm": 460.4079284667969, |
|
"learning_rate": 2.0444111862696314e-05, |
|
"loss": 41.1531, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 1.088328075709779, |
|
"grad_norm": 955.0888061523438, |
|
"learning_rate": 2.0336899099781636e-05, |
|
"loss": 37.0588, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 1.0905813429472735, |
|
"grad_norm": 565.38720703125, |
|
"learning_rate": 2.022977511558638e-05, |
|
"loss": 43.0631, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 1.092834610184768, |
|
"grad_norm": 471.39630126953125, |
|
"learning_rate": 2.0122741949596797e-05, |
|
"loss": 40.6631, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.0950878774222623, |
|
"grad_norm": 2771.78759765625, |
|
"learning_rate": 2.0015801639570074e-05, |
|
"loss": 38.7804, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 1.0973411446597567, |
|
"grad_norm": 921.8468627929688, |
|
"learning_rate": 1.9908956221495567e-05, |
|
"loss": 39.7282, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 1.099594411897251, |
|
"grad_norm": 722.0336303710938, |
|
"learning_rate": 1.980220772955602e-05, |
|
"loss": 39.0169, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 1.1018476791347454, |
|
"grad_norm": 5098.97900390625, |
|
"learning_rate": 1.9695558196088846e-05, |
|
"loss": 41.0709, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 1.1041009463722398, |
|
"grad_norm": 1300.223876953125, |
|
"learning_rate": 1.958900965154743e-05, |
|
"loss": 43.9169, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.1063542136097342, |
|
"grad_norm": 411.2376403808594, |
|
"learning_rate": 1.9482564124462476e-05, |
|
"loss": 38.9952, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 1.1086074808472284, |
|
"grad_norm": 959.572265625, |
|
"learning_rate": 1.937622364140338e-05, |
|
"loss": 39.8025, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 1.1108607480847228, |
|
"grad_norm": 594.6739501953125, |
|
"learning_rate": 1.9269990226939652e-05, |
|
"loss": 42.7149, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 1.1131140153222172, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9163865903602374e-05, |
|
"loss": 36.2475, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 1.1153672825597116, |
|
"grad_norm": 792.9468994140625, |
|
"learning_rate": 1.9057852691845677e-05, |
|
"loss": 36.3523, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.117620549797206, |
|
"grad_norm": 1249.7642822265625, |
|
"learning_rate": 1.895195261000831e-05, |
|
"loss": 35.0879, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 1.1198738170347002, |
|
"grad_norm": 297.8221740722656, |
|
"learning_rate": 1.8846167674275176e-05, |
|
"loss": 40.7354, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 1.1221270842721947, |
|
"grad_norm": 841.4304809570312, |
|
"learning_rate": 1.874049989863896e-05, |
|
"loss": 38.4855, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 1.124380351509689, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8634951294861808e-05, |
|
"loss": 40.9361, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 1.1266336187471835, |
|
"grad_norm": 553.7052612304688, |
|
"learning_rate": 1.852952387243698e-05, |
|
"loss": 43.6521, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.1288868859846777, |
|
"grad_norm": 441.8327331542969, |
|
"learning_rate": 1.842421963855063e-05, |
|
"loss": 34.7172, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 1.131140153222172, |
|
"grad_norm": 886.4244384765625, |
|
"learning_rate": 1.831904059804358e-05, |
|
"loss": 40.2296, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 1.1333934204596665, |
|
"grad_norm": 3301.468505859375, |
|
"learning_rate": 1.8213988753373146e-05, |
|
"loss": 36.6934, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 1.135646687697161, |
|
"grad_norm": 1075.7626953125, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 39.7705, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 1.1378999549346553, |
|
"grad_norm": 595.6084594726562, |
|
"learning_rate": 1.80042746492252e-05, |
|
"loss": 38.4987, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.1401532221721495, |
|
"grad_norm": 1291.255859375, |
|
"learning_rate": 1.7899616382401936e-05, |
|
"loss": 42.2248, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 1.142406489409644, |
|
"grad_norm": 431.224853515625, |
|
"learning_rate": 1.779509329664777e-05, |
|
"loss": 39.2028, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 1.1446597566471384, |
|
"grad_norm": 484.91259765625, |
|
"learning_rate": 1.7690707381931583e-05, |
|
"loss": 42.3496, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 1.1469130238846328, |
|
"grad_norm": 987.5538940429688, |
|
"learning_rate": 1.7586460625610728e-05, |
|
"loss": 38.0923, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 1.149166291122127, |
|
"grad_norm": 456.387451171875, |
|
"learning_rate": 1.7482355012393177e-05, |
|
"loss": 37.8689, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.1514195583596214, |
|
"grad_norm": 534.0150146484375, |
|
"learning_rate": 1.737839252429973e-05, |
|
"loss": 40.1989, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 1.1536728255971158, |
|
"grad_norm": 931.9837036132812, |
|
"learning_rate": 1.7274575140626318e-05, |
|
"loss": 36.386, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 1.1559260928346102, |
|
"grad_norm": 689.9551391601562, |
|
"learning_rate": 1.7170904837906265e-05, |
|
"loss": 41.5284, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 1.1581793600721046, |
|
"grad_norm": 1355.0162353515625, |
|
"learning_rate": 1.7067383589872703e-05, |
|
"loss": 40.0137, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 1.1604326273095988, |
|
"grad_norm": 403.37762451171875, |
|
"learning_rate": 1.6964013367420966e-05, |
|
"loss": 38.1865, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.1626858945470933, |
|
"grad_norm": 677.8140869140625, |
|
"learning_rate": 1.686079613857109e-05, |
|
"loss": 40.588, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 1.1649391617845877, |
|
"grad_norm": 815.564697265625, |
|
"learning_rate": 1.6757733868430325e-05, |
|
"loss": 35.5447, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 1.167192429022082, |
|
"grad_norm": 887.0990600585938, |
|
"learning_rate": 1.665482851915573e-05, |
|
"loss": 40.0702, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 1.1694456962595763, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6552082049916825e-05, |
|
"loss": 37.7994, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 1.1716989634970707, |
|
"grad_norm": 908.9335327148438, |
|
"learning_rate": 1.6449496416858284e-05, |
|
"loss": 41.0771, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.1739522307345651, |
|
"grad_norm": 554.3827514648438, |
|
"learning_rate": 1.6347073573062672e-05, |
|
"loss": 38.6849, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 1.1762054979720595, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6244815468513315e-05, |
|
"loss": 37.5018, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 1.178458765209554, |
|
"grad_norm": 500.462158203125, |
|
"learning_rate": 1.6142724050057102e-05, |
|
"loss": 40.3684, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 1.1807120324470481, |
|
"grad_norm": 477.7984924316406, |
|
"learning_rate": 1.6040801261367493e-05, |
|
"loss": 40.3859, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 1.1829652996845426, |
|
"grad_norm": 701.5234375, |
|
"learning_rate": 1.5939049042907462e-05, |
|
"loss": 37.1264, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.185218566922037, |
|
"grad_norm": 368.4427185058594, |
|
"learning_rate": 1.583746933189257e-05, |
|
"loss": 43.4277, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 1.1874718341595314, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5736064062254094e-05, |
|
"loss": 37.9021, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 1.1897251013970256, |
|
"grad_norm": 295.5749816894531, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 37.9324, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 1.19197836863452, |
|
"grad_norm": 732.7645263671875, |
|
"learning_rate": 1.553378456618918e-05, |
|
"loss": 34.4221, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 1.1942316358720144, |
|
"grad_norm": 578.3222045898438, |
|
"learning_rate": 1.5432914190872757e-05, |
|
"loss": 38.2748, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.1964849031095088, |
|
"grad_norm": 1566.3675537109375, |
|
"learning_rate": 1.533222595907949e-05, |
|
"loss": 39.7942, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 1.1987381703470033, |
|
"grad_norm": 1014.3735961914062, |
|
"learning_rate": 1.523172178776816e-05, |
|
"loss": 35.2939, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 1.2009914375844974, |
|
"grad_norm": 851.2781372070312, |
|
"learning_rate": 1.5131403590393323e-05, |
|
"loss": 33.0635, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 1.2032447048219919, |
|
"grad_norm": 855.8948364257812, |
|
"learning_rate": 1.5031273276868845e-05, |
|
"loss": 36.5281, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 1.2054979720594863, |
|
"grad_norm": 746.2517700195312, |
|
"learning_rate": 1.4931332753531574e-05, |
|
"loss": 40.8483, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.2077512392969807, |
|
"grad_norm": 737.263671875, |
|
"learning_rate": 1.4831583923104999e-05, |
|
"loss": 36.7425, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 1.2100045065344749, |
|
"grad_norm": 460.10894775390625, |
|
"learning_rate": 1.4732028684663074e-05, |
|
"loss": 39.4172, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 1.2122577737719693, |
|
"grad_norm": 532.8964233398438, |
|
"learning_rate": 1.463266893359403e-05, |
|
"loss": 39.1485, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 1.2145110410094637, |
|
"grad_norm": 746.1353759765625, |
|
"learning_rate": 1.4533506561564306e-05, |
|
"loss": 39.8182, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 1.2167643082469581, |
|
"grad_norm": 518.3690185546875, |
|
"learning_rate": 1.443454345648252e-05, |
|
"loss": 36.7215, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.2190175754844526, |
|
"grad_norm": 653.6338500976562, |
|
"learning_rate": 1.4335781502463552e-05, |
|
"loss": 36.7699, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 1.2212708427219467, |
|
"grad_norm": 592.44921875, |
|
"learning_rate": 1.4237222579792618e-05, |
|
"loss": 37.2595, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 1.2235241099594412, |
|
"grad_norm": 930.57080078125, |
|
"learning_rate": 1.4138868564889573e-05, |
|
"loss": 36.6723, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 1.2257773771969356, |
|
"grad_norm": 578.7326049804688, |
|
"learning_rate": 1.4040721330273062e-05, |
|
"loss": 41.5042, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 1.22803064443443, |
|
"grad_norm": 803.9358520507812, |
|
"learning_rate": 1.3942782744524973e-05, |
|
"loss": 37.0519, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.2302839116719242, |
|
"grad_norm": 1721.2294921875, |
|
"learning_rate": 1.3845054672254781e-05, |
|
"loss": 38.7933, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 1.2325371789094186, |
|
"grad_norm": 861.869140625, |
|
"learning_rate": 1.3747538974064122e-05, |
|
"loss": 37.8534, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 1.234790446146913, |
|
"grad_norm": 468.365478515625, |
|
"learning_rate": 1.3650237506511331e-05, |
|
"loss": 35.1038, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 1.2370437133844074, |
|
"grad_norm": 570.2977905273438, |
|
"learning_rate": 1.3553152122076079e-05, |
|
"loss": 34.9327, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 1.2392969806219019, |
|
"grad_norm": 1238.723388671875, |
|
"learning_rate": 1.3456284669124158e-05, |
|
"loss": 44.4615, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.241550247859396, |
|
"grad_norm": 559.5399169921875, |
|
"learning_rate": 1.3359636991872215e-05, |
|
"loss": 39.2116, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 1.2438035150968905, |
|
"grad_norm": 436.1620178222656, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 36.6266, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 1.2460567823343849, |
|
"grad_norm": 354.4382629394531, |
|
"learning_rate": 1.3167008320378918e-05, |
|
"loss": 34.6987, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 1.2483100495718793, |
|
"grad_norm": 1071.3955078125, |
|
"learning_rate": 1.3071030993509788e-05, |
|
"loss": 35.2384, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 1.2505633168093735, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2975280777015314e-05, |
|
"loss": 37.3418, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.252816584046868, |
|
"grad_norm": 1224.337646484375, |
|
"learning_rate": 1.2879759493841575e-05, |
|
"loss": 40.2309, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 1.2550698512843623, |
|
"grad_norm": 1002.5487670898438, |
|
"learning_rate": 1.2784468962576136e-05, |
|
"loss": 36.8461, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 1.2573231185218567, |
|
"grad_norm": 743.8724975585938, |
|
"learning_rate": 1.2689410997413325e-05, |
|
"loss": 40.4685, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 1.2595763857593512, |
|
"grad_norm": 876.872802734375, |
|
"learning_rate": 1.2594587408119804e-05, |
|
"loss": 37.6634, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 1.2618296529968454, |
|
"grad_norm": 2100.144775390625, |
|
"learning_rate": 1.2500000000000006e-05, |
|
"loss": 38.9799, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.2640829202343398, |
|
"grad_norm": 705.1195678710938, |
|
"learning_rate": 1.2405650573861846e-05, |
|
"loss": 39.5435, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 1.2663361874718342, |
|
"grad_norm": 803.4048461914062, |
|
"learning_rate": 1.2311540925982403e-05, |
|
"loss": 38.1768, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 1.2685894547093286, |
|
"grad_norm": 833.8876342773438, |
|
"learning_rate": 1.2217672848073702e-05, |
|
"loss": 39.7547, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 1.2708427219468228, |
|
"grad_norm": 1292.3961181640625, |
|
"learning_rate": 1.2124048127248644e-05, |
|
"loss": 40.0696, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 1.2730959891843172, |
|
"grad_norm": 686.7381591796875, |
|
"learning_rate": 1.2030668545986959e-05, |
|
"loss": 37.1013, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.2753492564218116, |
|
"grad_norm": 899.0304565429688, |
|
"learning_rate": 1.1937535882101281e-05, |
|
"loss": 36.339, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 1.277602523659306, |
|
"grad_norm": 632.4103393554688, |
|
"learning_rate": 1.1844651908703261e-05, |
|
"loss": 39.9224, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 1.2798557908968005, |
|
"grad_norm": 379.15496826171875, |
|
"learning_rate": 1.175201839416988e-05, |
|
"loss": 36.499, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 1.2821090581342947, |
|
"grad_norm": 382.707275390625, |
|
"learning_rate": 1.1659637102109714e-05, |
|
"loss": 36.4978, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 1.284362325371789, |
|
"grad_norm": 616.2396240234375, |
|
"learning_rate": 1.1567509791329401e-05, |
|
"loss": 36.3641, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.2866155926092835, |
|
"grad_norm": 2957.53955078125, |
|
"learning_rate": 1.1475638215800156e-05, |
|
"loss": 35.4105, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 1.288868859846778, |
|
"grad_norm": 676.7122192382812, |
|
"learning_rate": 1.1384024124624324e-05, |
|
"loss": 35.4272, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 1.291122127084272, |
|
"grad_norm": 318.8774719238281, |
|
"learning_rate": 1.1292669262002159e-05, |
|
"loss": 34.4229, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 1.2933753943217665, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1201575367198547e-05, |
|
"loss": 37.1322, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 1.295628661559261, |
|
"grad_norm": 397.2348937988281, |
|
"learning_rate": 1.1110744174509952e-05, |
|
"loss": 38.137, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.2978819287967553, |
|
"grad_norm": 580.2010498046875, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 37.5998, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 1.3001351960342498, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0929876807623285e-05, |
|
"loss": 34.9743, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 1.302388463271744, |
|
"grad_norm": 1732.2982177734375, |
|
"learning_rate": 1.0839844076879185e-05, |
|
"loss": 37.4842, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 1.3046417305092384, |
|
"grad_norm": 526.9512329101562, |
|
"learning_rate": 1.0750080935092425e-05, |
|
"loss": 35.1266, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 1.3068949977467328, |
|
"grad_norm": 893.0797729492188, |
|
"learning_rate": 1.0660589091223855e-05, |
|
"loss": 35.0203, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.3091482649842272, |
|
"grad_norm": 307.86212158203125, |
|
"learning_rate": 1.0571370249069162e-05, |
|
"loss": 41.4471, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 1.3114015322217214, |
|
"grad_norm": 1008.5663452148438, |
|
"learning_rate": 1.0482426107226507e-05, |
|
"loss": 33.4428, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 1.3136547994592158, |
|
"grad_norm": 1568.9752197265625, |
|
"learning_rate": 1.0393758359064146e-05, |
|
"loss": 38.1394, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 1.3159080666967102, |
|
"grad_norm": 754.859130859375, |
|
"learning_rate": 1.0305368692688174e-05, |
|
"loss": 35.5464, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 1.3181613339342046, |
|
"grad_norm": 412.3022766113281, |
|
"learning_rate": 1.0217258790910448e-05, |
|
"loss": 36.5812, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.320414601171699, |
|
"grad_norm": 726.6887817382812, |
|
"learning_rate": 1.0129430331216471e-05, |
|
"loss": 36.4859, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 1.3226678684091933, |
|
"grad_norm": 463.0475769042969, |
|
"learning_rate": 1.0041884985733524e-05, |
|
"loss": 38.6337, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 1.3249211356466877, |
|
"grad_norm": 782.8141479492188, |
|
"learning_rate": 9.954624421198792e-06, |
|
"loss": 40.0479, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 1.327174402884182, |
|
"grad_norm": 1247.14208984375, |
|
"learning_rate": 9.867650298927645e-06, |
|
"loss": 34.5978, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 1.3294276701216765, |
|
"grad_norm": 1039.3582763671875, |
|
"learning_rate": 9.780964274781984e-06, |
|
"loss": 36.1797, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.3316809373591707, |
|
"grad_norm": 439.5457763671875, |
|
"learning_rate": 9.694567999138765e-06, |
|
"loss": 36.7625, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 1.3339342045966651, |
|
"grad_norm": 960.1097412109375, |
|
"learning_rate": 9.608463116858542e-06, |
|
"loss": 35.3218, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 1.3361874718341595, |
|
"grad_norm": 981.8735961914062, |
|
"learning_rate": 9.522651267254149e-06, |
|
"loss": 38.6512, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 1.338440739071654, |
|
"grad_norm": 382.9591369628906, |
|
"learning_rate": 9.437134084059515e-06, |
|
"loss": 33.843, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 1.3406940063091484, |
|
"grad_norm": 454.0657043457031, |
|
"learning_rate": 9.351913195398524e-06, |
|
"loss": 36.7899, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.3429472735466426, |
|
"grad_norm": 381.1954040527344, |
|
"learning_rate": 9.266990223754069e-06, |
|
"loss": 37.5628, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 1.345200540784137, |
|
"grad_norm": 682.6860961914062, |
|
"learning_rate": 9.1823667859371e-06, |
|
"loss": 37.8172, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 1.3474538080216314, |
|
"grad_norm": 788.8383178710938, |
|
"learning_rate": 9.098044493055899e-06, |
|
"loss": 35.3697, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 1.3497070752591258, |
|
"grad_norm": 854.1769409179688, |
|
"learning_rate": 9.014024950485383e-06, |
|
"loss": 33.7009, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 1.35196034249662, |
|
"grad_norm": 980.5393676757812, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 36.0638, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.3542136097341144, |
|
"grad_norm": 560.2689208984375, |
|
"learning_rate": 8.84690050892591e-06, |
|
"loss": 38.2246, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 1.3564668769716088, |
|
"grad_norm": 987.0377197265625, |
|
"learning_rate": 8.763798791745411e-06, |
|
"loss": 36.7167, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 1.3587201442091033, |
|
"grad_norm": 877.1320190429688, |
|
"learning_rate": 8.681006188431946e-06, |
|
"loss": 31.5632, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 1.3609734114465977, |
|
"grad_norm": 1143.940185546875, |
|
"learning_rate": 8.598524275237322e-06, |
|
"loss": 37.9032, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 1.3632266786840919, |
|
"grad_norm": 394.9543762207031, |
|
"learning_rate": 8.51635462249828e-06, |
|
"loss": 41.1612, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 1.3654799459215863, |
|
"grad_norm": 933.392822265625, |
|
"learning_rate": 8.434498794606568e-06, |
|
"loss": 36.5777, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 1.3677332131590807, |
|
"grad_norm": 958.4544067382812, |
|
"learning_rate": 8.352958349979145e-06, |
|
"loss": 34.62, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 1.3699864803965751, |
|
"grad_norm": 1324.6890869140625, |
|
"learning_rate": 8.271734841028553e-06, |
|
"loss": 36.4317, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 1.3722397476340693, |
|
"grad_norm": 562.975341796875, |
|
"learning_rate": 8.190829814133294e-06, |
|
"loss": 37.9488, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 1.3744930148715637, |
|
"grad_norm": 407.0682067871094, |
|
"learning_rate": 8.110244809608495e-06, |
|
"loss": 35.8329, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.3767462821090581, |
|
"grad_norm": 452.447998046875, |
|
"learning_rate": 8.029981361676456e-06, |
|
"loss": 36.064, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 1.3789995493465526, |
|
"grad_norm": 673.4539794921875, |
|
"learning_rate": 7.950040998437542e-06, |
|
"loss": 35.4829, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 1.381252816584047, |
|
"grad_norm": 802.2340698242188, |
|
"learning_rate": 7.87042524184102e-06, |
|
"loss": 38.9247, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 1.3835060838215412, |
|
"grad_norm": 276.5713806152344, |
|
"learning_rate": 7.791135607656147e-06, |
|
"loss": 36.7328, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 1.3857593510590356, |
|
"grad_norm": 359.2288818359375, |
|
"learning_rate": 7.712173605443269e-06, |
|
"loss": 35.8955, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 1.38801261829653, |
|
"grad_norm": 590.3004150390625, |
|
"learning_rate": 7.633540738525066e-06, |
|
"loss": 36.7346, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 1.3902658855340244, |
|
"grad_norm": 2431.375732421875, |
|
"learning_rate": 7.555238503958001e-06, |
|
"loss": 38.1598, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 1.3925191527715186, |
|
"grad_norm": 847.335205078125, |
|
"learning_rate": 7.477268392503728e-06, |
|
"loss": 35.9027, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 1.394772420009013, |
|
"grad_norm": 543.1801147460938, |
|
"learning_rate": 7.399631888600797e-06, |
|
"loss": 37.6817, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 1.3970256872465074, |
|
"grad_norm": 415.84918212890625, |
|
"learning_rate": 7.3223304703363135e-06, |
|
"loss": 34.3274, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.3992789544840019, |
|
"grad_norm": 777.2684936523438, |
|
"learning_rate": 7.245365609417864e-06, |
|
"loss": 39.1105, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 1.4015322217214963, |
|
"grad_norm": 1126.7650146484375, |
|
"learning_rate": 7.168738771145464e-06, |
|
"loss": 37.2822, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 1.4037854889589905, |
|
"grad_norm": 352.9339904785156, |
|
"learning_rate": 7.092451414383644e-06, |
|
"loss": 39.5989, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 1.4060387561964849, |
|
"grad_norm": 790.8270874023438, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 37.4908, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 1.4082920234339793, |
|
"grad_norm": 1335.3138427734375, |
|
"learning_rate": 6.940900948506113e-06, |
|
"loss": 42.1853, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.4105452906714737, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.865640724692815e-06, |
|
"loss": 37.1607, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 1.412798557908968, |
|
"grad_norm": 466.6181945800781, |
|
"learning_rate": 6.790725752939997e-06, |
|
"loss": 37.1732, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 1.4150518251464623, |
|
"grad_norm": 317.6047668457031, |
|
"learning_rate": 6.716157459520739e-06, |
|
"loss": 39.2635, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 1.4173050923839567, |
|
"grad_norm": 553.5538330078125, |
|
"learning_rate": 6.641937264107867e-06, |
|
"loss": 37.3103, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 1.4195583596214512, |
|
"grad_norm": 282.9200439453125, |
|
"learning_rate": 6.568066579746901e-06, |
|
"loss": 38.9337, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.4218116268589456, |
|
"grad_norm": 1359.1005859375, |
|
"learning_rate": 6.494546812829206e-06, |
|
"loss": 41.63, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 1.4240648940964398, |
|
"grad_norm": 809.4750366210938, |
|
"learning_rate": 6.421379363065142e-06, |
|
"loss": 33.3718, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 1.4263181613339342, |
|
"grad_norm": 259.95654296875, |
|
"learning_rate": 6.348565623457514e-06, |
|
"loss": 39.1779, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 1262.052734375, |
|
"learning_rate": 6.2761069802749455e-06, |
|
"loss": 33.9572, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 1.430824695808923, |
|
"grad_norm": 234.85487365722656, |
|
"learning_rate": 6.204004813025568e-06, |
|
"loss": 40.2383, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 1.4330779630464172, |
|
"grad_norm": 342.09375, |
|
"learning_rate": 6.1322604944307e-06, |
|
"loss": 40.2604, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 1.4353312302839116, |
|
"grad_norm": 1251.5074462890625, |
|
"learning_rate": 6.060875390398757e-06, |
|
"loss": 38.2612, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 1.437584497521406, |
|
"grad_norm": 1840.7529296875, |
|
"learning_rate": 5.989850859999227e-06, |
|
"loss": 35.3611, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 1.4398377647589005, |
|
"grad_norm": 1598.6044921875, |
|
"learning_rate": 5.919188255436778e-06, |
|
"loss": 35.9621, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 1.4420910319963949, |
|
"grad_norm": 492.7054443359375, |
|
"learning_rate": 5.848888922025553e-06, |
|
"loss": 38.0005, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.444344299233889, |
|
"grad_norm": 935.9298095703125, |
|
"learning_rate": 5.778954198163514e-06, |
|
"loss": 37.7181, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 1.4465975664713835, |
|
"grad_norm": 583.2149658203125, |
|
"learning_rate": 5.709385415307006e-06, |
|
"loss": 38.0206, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 1.448850833708878, |
|
"grad_norm": 1340.3944091796875, |
|
"learning_rate": 5.640183897945362e-06, |
|
"loss": 36.6506, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 1.4511041009463723, |
|
"grad_norm": 358.40875244140625, |
|
"learning_rate": 5.571350963575728e-06, |
|
"loss": 36.0538, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 1.4533573681838665, |
|
"grad_norm": 1554.845703125, |
|
"learning_rate": 5.50288792267796e-06, |
|
"loss": 34.25, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 1.455610635421361, |
|
"grad_norm": 1170.0960693359375, |
|
"learning_rate": 5.434796078689652e-06, |
|
"loss": 32.1239, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 1.4578639026588553, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.367076727981382e-06, |
|
"loss": 40.6604, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 1.4601171698963498, |
|
"grad_norm": 417.8546142578125, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 37.095, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 1.4623704371338442, |
|
"grad_norm": 377.0328369140625, |
|
"learning_rate": 5.2327606564039234e-06, |
|
"loss": 35.4373, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 1.4646237043713384, |
|
"grad_norm": 625.896240234375, |
|
"learning_rate": 5.166166492719124e-06, |
|
"loss": 34.6625, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.4668769716088328, |
|
"grad_norm": 1136.8114013671875, |
|
"learning_rate": 5.099949936634451e-06, |
|
"loss": 35.121, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 1.4691302388463272, |
|
"grad_norm": 490.1904296875, |
|
"learning_rate": 5.034112248817685e-06, |
|
"loss": 35.5372, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 1.4713835060838216, |
|
"grad_norm": 299.876953125, |
|
"learning_rate": 4.9686546827234865e-06, |
|
"loss": 36.2584, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 1.4736367733213158, |
|
"grad_norm": 672.1801147460938, |
|
"learning_rate": 4.903578484569568e-06, |
|
"loss": 38.8503, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 1.4758900405588102, |
|
"grad_norm": 506.11322021484375, |
|
"learning_rate": 4.8388848933129335e-06, |
|
"loss": 35.2598, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 1.4781433077963047, |
|
"grad_norm": 347.1400146484375, |
|
"learning_rate": 4.7745751406263165e-06, |
|
"loss": 37.9243, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 1.480396575033799, |
|
"grad_norm": 741.727783203125, |
|
"learning_rate": 4.710650450874693e-06, |
|
"loss": 36.6068, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 1.4826498422712935, |
|
"grad_norm": 490.6405944824219, |
|
"learning_rate": 4.647112041092022e-06, |
|
"loss": 37.8632, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 1.4849031095087877, |
|
"grad_norm": 148.07147216796875, |
|
"learning_rate": 4.583961120958027e-06, |
|
"loss": 39.7858, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 1.487156376746282, |
|
"grad_norm": 1348.078857421875, |
|
"learning_rate": 4.521198892775203e-06, |
|
"loss": 42.7009, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.4894096439837765, |
|
"grad_norm": 207.09417724609375, |
|
"learning_rate": 4.45882655144591e-06, |
|
"loss": 35.0663, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 1.4916629112212707, |
|
"grad_norm": 810.766845703125, |
|
"learning_rate": 4.396845284449608e-06, |
|
"loss": 36.5057, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 1.4939161784587651, |
|
"grad_norm": 2021.21728515625, |
|
"learning_rate": 4.335256271820287e-06, |
|
"loss": 36.8715, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 1.4961694456962595, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.274060686123959e-06, |
|
"loss": 35.6832, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 1.498422712933754, |
|
"grad_norm": 1135.528076171875, |
|
"learning_rate": 4.213259692436367e-06, |
|
"loss": 33.5205, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.5006759801712484, |
|
"grad_norm": 950.0307006835938, |
|
"learning_rate": 4.152854448320797e-06, |
|
"loss": 37.6403, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 1.5029292474087428, |
|
"grad_norm": 1010.8804321289062, |
|
"learning_rate": 4.092846103806011e-06, |
|
"loss": 35.893, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 1.5051825146462372, |
|
"grad_norm": 2760.01611328125, |
|
"learning_rate": 4.0332358013644016e-06, |
|
"loss": 40.6062, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 1.5074357818837314, |
|
"grad_norm": 843.011962890625, |
|
"learning_rate": 3.9740246758901895e-06, |
|
"loss": 37.1098, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 1.5096890491212258, |
|
"grad_norm": 1276.917236328125, |
|
"learning_rate": 3.9152138546778625e-06, |
|
"loss": 32.2139, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.51194231635872, |
|
"grad_norm": 1487.1099853515625, |
|
"learning_rate": 3.85680445740067e-06, |
|
"loss": 37.2284, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 1.5141955835962144, |
|
"grad_norm": 953.3679809570312, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 39.7566, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 1.5164488508337088, |
|
"grad_norm": 858.4658813476562, |
|
"learning_rate": 3.741194375110932e-06, |
|
"loss": 35.92, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 1.5187021180712033, |
|
"grad_norm": 309.0785217285156, |
|
"learning_rate": 3.6839958911476957e-06, |
|
"loss": 38.8753, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 1.5209553853086977, |
|
"grad_norm": 1036.7149658203125, |
|
"learning_rate": 3.6272032331763408e-06, |
|
"loss": 38.0908, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.523208652546192, |
|
"grad_norm": 620.3539428710938, |
|
"learning_rate": 3.5708174824471947e-06, |
|
"loss": 34.9211, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 1.5254619197836865, |
|
"grad_norm": 447.8617248535156, |
|
"learning_rate": 3.5148397124636826e-06, |
|
"loss": 39.3934, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 1.5277151870211807, |
|
"grad_norm": 517.4603881835938, |
|
"learning_rate": 3.4592709889618545e-06, |
|
"loss": 36.0529, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 1.5299684542586751, |
|
"grad_norm": 872.0263671875, |
|
"learning_rate": 3.4041123698901084e-06, |
|
"loss": 34.7142, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 1.5322217214961693, |
|
"grad_norm": 596.3970947265625, |
|
"learning_rate": 3.3493649053890326e-06, |
|
"loss": 35.7701, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.5344749887336637, |
|
"grad_norm": 1187.1817626953125, |
|
"learning_rate": 3.295029637771441e-06, |
|
"loss": 34.7188, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 1.5367282559711581, |
|
"grad_norm": 2972.13916015625, |
|
"learning_rate": 3.2411076015025075e-06, |
|
"loss": 39.0094, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 1.5389815232086526, |
|
"grad_norm": 484.574951171875, |
|
"learning_rate": 3.187599823180071e-06, |
|
"loss": 36.4442, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 1.541234790446147, |
|
"grad_norm": 512.0244140625, |
|
"learning_rate": 3.1345073215151066e-06, |
|
"loss": 35.6959, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 1.5434880576836414, |
|
"grad_norm": 1594.818603515625, |
|
"learning_rate": 3.081831107312308e-06, |
|
"loss": 33.7189, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.5457413249211358, |
|
"grad_norm": 412.11993408203125, |
|
"learning_rate": 3.029572183450868e-06, |
|
"loss": 39.2777, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 1.54799459215863, |
|
"grad_norm": 261.7032165527344, |
|
"learning_rate": 2.9777315448653614e-06, |
|
"loss": 37.5702, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 1.5502478593961244, |
|
"grad_norm": 1331.4417724609375, |
|
"learning_rate": 2.9263101785268254e-06, |
|
"loss": 38.3099, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 1.5525011266336186, |
|
"grad_norm": 1108.656005859375, |
|
"learning_rate": 2.875309063423956e-06, |
|
"loss": 37.6969, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 1.554754393871113, |
|
"grad_norm": 629.5645751953125, |
|
"learning_rate": 2.8247291705444575e-06, |
|
"loss": 34.4015, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.5570076611086074, |
|
"grad_norm": 339.4286193847656, |
|
"learning_rate": 2.7745714628565927e-06, |
|
"loss": 34.8371, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 1.5592609283461019, |
|
"grad_norm": 497.2080383300781, |
|
"learning_rate": 2.7248368952908053e-06, |
|
"loss": 34.618, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 1.5615141955835963, |
|
"grad_norm": 447.3420104980469, |
|
"learning_rate": 2.6755264147215797e-06, |
|
"loss": 34.2607, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 1.5637674628210907, |
|
"grad_norm": 507.5426330566406, |
|
"learning_rate": 2.6266409599493753e-06, |
|
"loss": 40.3609, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 1.566020730058585, |
|
"grad_norm": 504.575927734375, |
|
"learning_rate": 2.578181461682794e-06, |
|
"loss": 35.5052, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.5682739972960793, |
|
"grad_norm": 614.0762329101562, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 35.4192, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 1.5705272645335737, |
|
"grad_norm": 1427.1513671875, |
|
"learning_rate": 2.482544016935304e-06, |
|
"loss": 33.3038, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 1.572780531771068, |
|
"grad_norm": 1021.6607666015625, |
|
"learning_rate": 2.43536789125349e-06, |
|
"loss": 33.7017, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 1.5750337990085623, |
|
"grad_norm": 1753.479736328125, |
|
"learning_rate": 2.3886213636407973e-06, |
|
"loss": 36.5257, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 1.5772870662460567, |
|
"grad_norm": 1146.5517578125, |
|
"learning_rate": 2.3423053240837515e-06, |
|
"loss": 36.1648, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.5795403334835512, |
|
"grad_norm": 586.0597534179688, |
|
"learning_rate": 2.296420654372966e-06, |
|
"loss": 38.7891, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 1.5817936007210456, |
|
"grad_norm": 637.76904296875, |
|
"learning_rate": 2.2509682280864224e-06, |
|
"loss": 33.7852, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 1.58404686795854, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.205948910572786e-06, |
|
"loss": 34.2423, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 1.5863001351960344, |
|
"grad_norm": 583.9962158203125, |
|
"learning_rate": 2.1613635589349756e-06, |
|
"loss": 35.2592, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 1.5885534024335286, |
|
"grad_norm": 372.9781799316406, |
|
"learning_rate": 2.1172130220138226e-06, |
|
"loss": 36.0234, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.590806669671023, |
|
"grad_norm": 689.7586669921875, |
|
"learning_rate": 2.073498140371899e-06, |
|
"loss": 38.4326, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 1.5930599369085172, |
|
"grad_norm": 1217.2618408203125, |
|
"learning_rate": 2.030219746277545e-06, |
|
"loss": 35.6317, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 1.5953132041460116, |
|
"grad_norm": 1366.7015380859375, |
|
"learning_rate": 1.9873786636889906e-06, |
|
"loss": 32.8517, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 1.597566471383506, |
|
"grad_norm": 463.5328063964844, |
|
"learning_rate": 1.9449757082387083e-06, |
|
"loss": 36.9324, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 1.5998197386210005, |
|
"grad_norm": 873.59130859375, |
|
"learning_rate": 1.9030116872178316e-06, |
|
"loss": 35.7242, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.6020730058584949, |
|
"grad_norm": 533.456298828125, |
|
"learning_rate": 1.8614873995608406e-06, |
|
"loss": 34.3465, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 1.6043262730959893, |
|
"grad_norm": 1079.99853515625, |
|
"learning_rate": 1.8204036358303173e-06, |
|
"loss": 35.8141, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 1.6065795403334837, |
|
"grad_norm": 788.5497436523438, |
|
"learning_rate": 1.7797611782018942e-06, |
|
"loss": 34.5627, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 1.608832807570978, |
|
"grad_norm": 975.3521728515625, |
|
"learning_rate": 1.7395608004493886e-06, |
|
"loss": 34.3037, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 1.6110860748084723, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6998032679300391e-06, |
|
"loss": 33.9326, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.6133393420459665, |
|
"grad_norm": 1441.5693359375, |
|
"learning_rate": 1.6604893375699594e-06, |
|
"loss": 34.2679, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 1.615592609283461, |
|
"grad_norm": 1534.6036376953125, |
|
"learning_rate": 1.62161975784971e-06, |
|
"loss": 31.3119, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 1.6178458765209554, |
|
"grad_norm": 1183.213623046875, |
|
"learning_rate": 1.5831952687900608e-06, |
|
"loss": 37.1831, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 1.6200991437584498, |
|
"grad_norm": 1218.68505859375, |
|
"learning_rate": 1.5452166019378989e-06, |
|
"loss": 35.6401, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 1.6223524109959442, |
|
"grad_norm": 1537.08056640625, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 36.5323, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.6246056782334386, |
|
"grad_norm": 1524.171875, |
|
"learning_rate": 1.4705996185907373e-06, |
|
"loss": 35.4555, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 1.626858945470933, |
|
"grad_norm": 805.3032836914062, |
|
"learning_rate": 1.4339627226955392e-06, |
|
"loss": 36.3888, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 1.6291122127084272, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3977744901803951e-06, |
|
"loss": 36.6125, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 1.6313654799459216, |
|
"grad_norm": 764.4529418945312, |
|
"learning_rate": 1.362035610017079e-06, |
|
"loss": 36.2714, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 1.6336187471834158, |
|
"grad_norm": 208.58338928222656, |
|
"learning_rate": 1.3267467626223606e-06, |
|
"loss": 34.2652, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.6358720144209102, |
|
"grad_norm": 1677.5860595703125, |
|
"learning_rate": 1.291908619845017e-06, |
|
"loss": 35.1815, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 1.6381252816584047, |
|
"grad_norm": 431.1494140625, |
|
"learning_rate": 1.2575218449530746e-06, |
|
"loss": 33.5625, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 1.640378548895899, |
|
"grad_norm": 833.9300537109375, |
|
"learning_rate": 1.2235870926211619e-06, |
|
"loss": 36.9845, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 1.6426318161333935, |
|
"grad_norm": 671.1135864257812, |
|
"learning_rate": 1.190105008918041e-06, |
|
"loss": 35.8609, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 1.644885083370888, |
|
"grad_norm": 565.2303466796875, |
|
"learning_rate": 1.1570762312943295e-06, |
|
"loss": 32.2597, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.6471383506083823, |
|
"grad_norm": 701.4552001953125, |
|
"learning_rate": 1.1245013885703343e-06, |
|
"loss": 36.2914, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 1.6493916178458765, |
|
"grad_norm": 748.3810424804688, |
|
"learning_rate": 1.0923811009241142e-06, |
|
"loss": 32.0475, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 1.651644885083371, |
|
"grad_norm": 581.8877563476562, |
|
"learning_rate": 1.0607159798796396e-06, |
|
"loss": 34.1596, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 1.6538981523208651, |
|
"grad_norm": 377.5722961425781, |
|
"learning_rate": 1.0295066282951738e-06, |
|
"loss": 38.7637, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 1.6561514195583595, |
|
"grad_norm": 905.6104736328125, |
|
"learning_rate": 9.98753640351785e-07, |
|
"loss": 39.1345, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 1.658404686795854, |
|
"grad_norm": 491.26531982421875, |
|
"learning_rate": 9.684576015420278e-07, |
|
"loss": 34.3473, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 1.6606579540333484, |
|
"grad_norm": 950.9983520507812, |
|
"learning_rate": 9.386190886588208e-07, |
|
"loss": 37.4909, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 1.6629112212708428, |
|
"grad_norm": 1486.637451171875, |
|
"learning_rate": 9.092386697844263e-07, |
|
"loss": 32.4935, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 1.6651644885083372, |
|
"grad_norm": 797.1807861328125, |
|
"learning_rate": 8.803169042796766e-07, |
|
"loss": 39.2807, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 1.6674177557458316, |
|
"grad_norm": 1035.8782958984375, |
|
"learning_rate": 8.51854342773295e-07, |
|
"loss": 35.8686, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.6696710229833258, |
|
"grad_norm": 850.1578979492188, |
|
"learning_rate": 8.23851527151423e-07, |
|
"loss": 33.6619, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 1.6719242902208202, |
|
"grad_norm": 570.3875732421875, |
|
"learning_rate": 7.963089905473092e-07, |
|
"loss": 34.901, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 1.6741775574583144, |
|
"grad_norm": 911.00341796875, |
|
"learning_rate": 7.692272573311426e-07, |
|
"loss": 37.3474, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 1.6764308246958088, |
|
"grad_norm": 1152.5958251953125, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 32.3363, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 1.6786840919333033, |
|
"grad_norm": 1237.0150146484375, |
|
"learning_rate": 7.164482546684642e-07, |
|
"loss": 35.8239, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 1.6809373591707977, |
|
"grad_norm": 508.03125, |
|
"learning_rate": 6.907519900580861e-07, |
|
"loss": 36.2687, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 1.683190626408292, |
|
"grad_norm": 325.2568359375, |
|
"learning_rate": 6.65518538488788e-07, |
|
"loss": 33.2241, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 1.6854438936457865, |
|
"grad_norm": 393.15087890625, |
|
"learning_rate": 6.407483803691216e-07, |
|
"loss": 36.265, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 1.687697160883281, |
|
"grad_norm": 647.8585205078125, |
|
"learning_rate": 6.164419872871835e-07, |
|
"loss": 32.4864, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 1.6899504281207751, |
|
"grad_norm": 1219.8280029296875, |
|
"learning_rate": 5.925998220016659e-07, |
|
"loss": 37.0409, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.6922036953582695, |
|
"grad_norm": 969.5092163085938, |
|
"learning_rate": 5.692223384330287e-07, |
|
"loss": 36.2839, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 1.6944569625957637, |
|
"grad_norm": 1250.261962890625, |
|
"learning_rate": 5.463099816548579e-07, |
|
"loss": 34.0554, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 1.6967102298332581, |
|
"grad_norm": 867.454833984375, |
|
"learning_rate": 5.238631878854039e-07, |
|
"loss": 37.9732, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 1.6989634970707526, |
|
"grad_norm": 418.4448547363281, |
|
"learning_rate": 5.018823844792603e-07, |
|
"loss": 37.3185, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 1.701216764308247, |
|
"grad_norm": 1724.4500732421875, |
|
"learning_rate": 4.803679899192392e-07, |
|
"loss": 35.9752, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 1.7034700315457414, |
|
"grad_norm": 621.5121459960938, |
|
"learning_rate": 4.5932041380840065e-07, |
|
"loss": 36.9349, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 1.7057232987832358, |
|
"grad_norm": 843.6145629882812, |
|
"learning_rate": 4.3874005686225796e-07, |
|
"loss": 36.2777, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 1.7079765660207302, |
|
"grad_norm": 800.4345703125, |
|
"learning_rate": 4.1862731090113736e-07, |
|
"loss": 37.3644, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 1.7102298332582244, |
|
"grad_norm": 1632.2015380859375, |
|
"learning_rate": 3.9898255884272817e-07, |
|
"loss": 34.1544, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 1.7124831004957188, |
|
"grad_norm": 1353.8416748046875, |
|
"learning_rate": 3.7980617469479953e-07, |
|
"loss": 37.3623, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.714736367733213, |
|
"grad_norm": 423.76629638671875, |
|
"learning_rate": 3.6109852354805627e-07, |
|
"loss": 36.1738, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 1.7169896349707074, |
|
"grad_norm": 927.58203125, |
|
"learning_rate": 3.428599615692141e-07, |
|
"loss": 37.3451, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 1.7192429022082019, |
|
"grad_norm": 693.3057250976562, |
|
"learning_rate": 3.250908359942045e-07, |
|
"loss": 34.0252, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 1.7214961694456963, |
|
"grad_norm": 597.1045532226562, |
|
"learning_rate": 3.077914851215585e-07, |
|
"loss": 36.9648, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 1.7237494366831907, |
|
"grad_norm": 1085.2398681640625, |
|
"learning_rate": 2.909622383059835e-07, |
|
"loss": 36.2999, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 1.726002703920685, |
|
"grad_norm": 890.6967163085938, |
|
"learning_rate": 2.746034159520794e-07, |
|
"loss": 32.9992, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 1.7282559711581793, |
|
"grad_norm": 722.5303344726562, |
|
"learning_rate": 2.5871532950824394e-07, |
|
"loss": 35.6259, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 1.7305092383956737, |
|
"grad_norm": 797.7754516601562, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 38.2101, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 1.7327625056331681, |
|
"grad_norm": 1722.019775390625, |
|
"learning_rate": 2.283525653279439e-07, |
|
"loss": 34.0617, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 1.7350157728706623, |
|
"grad_norm": 345.729248046875, |
|
"learning_rate": 2.1387846565474045e-07, |
|
"loss": 34.5921, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.7372690401081567, |
|
"grad_norm": 366.74639892578125, |
|
"learning_rate": 1.998762580071256e-07, |
|
"loss": 33.4238, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 1.7395223073456512, |
|
"grad_norm": 878.8009643554688, |
|
"learning_rate": 1.8634620896695043e-07, |
|
"loss": 35.3607, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 1.7417755745831456, |
|
"grad_norm": 2363.9150390625, |
|
"learning_rate": 1.732885761268427e-07, |
|
"loss": 39.3755, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 1.74402884182064, |
|
"grad_norm": 594.54150390625, |
|
"learning_rate": 1.607036080853136e-07, |
|
"loss": 32.0209, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 1.7462821090581344, |
|
"grad_norm": 1179.718994140625, |
|
"learning_rate": 1.4859154444200884e-07, |
|
"loss": 35.8264, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 1.7485353762956286, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3695261579316777e-07, |
|
"loss": 34.839, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 1.750788643533123, |
|
"grad_norm": 662.6306762695312, |
|
"learning_rate": 1.257870437272074e-07, |
|
"loss": 38.8326, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 1.7530419107706174, |
|
"grad_norm": 451.3083190917969, |
|
"learning_rate": 1.1509504082052869e-07, |
|
"loss": 33.723, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 1.7552951780081116, |
|
"grad_norm": 844.54833984375, |
|
"learning_rate": 1.0487681063345856e-07, |
|
"loss": 36.3044, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 1.757548445245606, |
|
"grad_norm": 298.12890625, |
|
"learning_rate": 9.513254770636137e-08, |
|
"loss": 35.3172, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.7598017124831005, |
|
"grad_norm": 899.2721557617188, |
|
"learning_rate": 8.586243755596413e-08, |
|
"loss": 37.8019, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 1.7620549797205949, |
|
"grad_norm": 302.97686767578125, |
|
"learning_rate": 7.706665667180091e-08, |
|
"loss": 33.4112, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 1.7643082469580893, |
|
"grad_norm": 1330.9530029296875, |
|
"learning_rate": 6.874537251286006e-08, |
|
"loss": 36.4382, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 1.7665615141955837, |
|
"grad_norm": 576.2586059570312, |
|
"learning_rate": 6.089874350439506e-08, |
|
"loss": 40.1275, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 1.768814781433078, |
|
"grad_norm": 1114.9024658203125, |
|
"learning_rate": 5.352691903491303e-08, |
|
"loss": 36.7309, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 1.7710680486705723, |
|
"grad_norm": 118.37944030761719, |
|
"learning_rate": 4.6630039453327e-08, |
|
"loss": 38.1478, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 1.7733213159080667, |
|
"grad_norm": 589.9091796875, |
|
"learning_rate": 4.020823606628032e-08, |
|
"loss": 36.3156, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 1.775574583145561, |
|
"grad_norm": 628.8412475585938, |
|
"learning_rate": 3.426163113565417e-08, |
|
"loss": 37.6031, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 1.7778278503830554, |
|
"grad_norm": 350.41790771484375, |
|
"learning_rate": 2.879033787623331e-08, |
|
"loss": 34.7334, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 1.7800811176205498, |
|
"grad_norm": 441.7391357421875, |
|
"learning_rate": 2.3794460453555047e-08, |
|
"loss": 35.6559, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.7823343848580442, |
|
"grad_norm": 370.7310791015625, |
|
"learning_rate": 1.9274093981927478e-08, |
|
"loss": 33.2237, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 1.7845876520955386, |
|
"grad_norm": 804.0838623046875, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 33.7644, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 1.786840919333033, |
|
"grad_norm": 804.6845092773438, |
|
"learning_rate": 1.1660229082177676e-08, |
|
"loss": 33.2785, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 1.7890941865705272, |
|
"grad_norm": 561.6786499023438, |
|
"learning_rate": 8.566875611068504e-09, |
|
"loss": 39.918, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 1.7913474538080216, |
|
"grad_norm": 894.83544921875, |
|
"learning_rate": 5.94932300227169e-09, |
|
"loss": 35.263, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 1.793600721045516, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.807621090218261e-09, |
|
"loss": 36.6426, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 1.7958539882830102, |
|
"grad_norm": 489.0849914550781, |
|
"learning_rate": 2.1418106498249933e-09, |
|
"loss": 34.9072, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 1.7981072555205047, |
|
"grad_norm": 893.0217895507812, |
|
"learning_rate": 9.51923395717258e-10, |
|
"loss": 36.0869, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 1.800360522757999, |
|
"grad_norm": 5594.39306640625, |
|
"learning_rate": 2.379819816378248e-10, |
|
"loss": 33.2752, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 1.8026137899954935, |
|
"grad_norm": 783.1289672851562, |
|
"learning_rate": 0.0, |
|
"loss": 37.2431, |
|
"step": 80000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 80000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 40000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|