YL95's picture
Upload folder using huggingface_hub
46e1191
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 24.414529914529915,
"eval_steps": 500,
"global_step": 18018,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 2.5527192008879026e-06,
"loss": 0.8199,
"step": 46
},
{
"epoch": 0.39,
"learning_rate": 5.105438401775805e-06,
"loss": 0.8791,
"step": 92
},
{
"epoch": 0.59,
"learning_rate": 7.658157602663706e-06,
"loss": 0.7995,
"step": 138
},
{
"epoch": 0.79,
"learning_rate": 1.021087680355161e-05,
"loss": 0.7234,
"step": 184
},
{
"epoch": 0.98,
"learning_rate": 1.2763596004439513e-05,
"loss": 0.6077,
"step": 230
},
{
"epoch": 1.18,
"learning_rate": 1.5316315205327412e-05,
"loss": 0.5855,
"step": 276
},
{
"epoch": 1.38,
"learning_rate": 1.786903440621532e-05,
"loss": 0.5165,
"step": 322
},
{
"epoch": 1.57,
"learning_rate": 2.042175360710322e-05,
"loss": 0.5176,
"step": 368
},
{
"epoch": 1.77,
"learning_rate": 2.297447280799112e-05,
"loss": 0.4479,
"step": 414
},
{
"epoch": 1.97,
"learning_rate": 2.5527192008879026e-05,
"loss": 0.4451,
"step": 460
},
{
"epoch": 2.16,
"learning_rate": 2.807991120976693e-05,
"loss": 0.4133,
"step": 506
},
{
"epoch": 2.36,
"learning_rate": 3.0632630410654825e-05,
"loss": 0.3851,
"step": 552
},
{
"epoch": 2.56,
"learning_rate": 3.3185349611542734e-05,
"loss": 0.3973,
"step": 598
},
{
"epoch": 2.75,
"learning_rate": 3.573806881243064e-05,
"loss": 0.3546,
"step": 644
},
{
"epoch": 2.95,
"learning_rate": 3.829078801331853e-05,
"loss": 0.329,
"step": 690
},
{
"epoch": 3.15,
"learning_rate": 4.084350721420644e-05,
"loss": 0.3476,
"step": 736
},
{
"epoch": 1.06,
"learning_rate": 4.3396226415094345e-05,
"loss": 0.3039,
"step": 782
},
{
"epoch": 1.25,
"learning_rate": 4.594894561598224e-05,
"loss": 0.3156,
"step": 828
},
{
"epoch": 1.45,
"learning_rate": 4.850166481687014e-05,
"loss": 0.3129,
"step": 874
},
{
"epoch": 1.65,
"learning_rate": 5.105438401775805e-05,
"loss": 0.2968,
"step": 920
},
{
"epoch": 1.84,
"learning_rate": 5.360710321864595e-05,
"loss": 0.3168,
"step": 966
},
{
"epoch": 2.04,
"learning_rate": 5.615982241953386e-05,
"loss": 0.3347,
"step": 1012
},
{
"epoch": 2.24,
"learning_rate": 5.871254162042176e-05,
"loss": 0.3011,
"step": 1058
},
{
"epoch": 2.43,
"learning_rate": 6.126526082130965e-05,
"loss": 0.2856,
"step": 1104
},
{
"epoch": 2.63,
"learning_rate": 6.381798002219757e-05,
"loss": 0.2626,
"step": 1150
},
{
"epoch": 2.82,
"learning_rate": 6.637069922308547e-05,
"loss": 0.305,
"step": 1196
},
{
"epoch": 3.02,
"learning_rate": 6.892341842397336e-05,
"loss": 0.2895,
"step": 1242
},
{
"epoch": 3.22,
"learning_rate": 7.147613762486127e-05,
"loss": 0.2791,
"step": 1288
},
{
"epoch": 3.41,
"learning_rate": 7.402885682574918e-05,
"loss": 0.2542,
"step": 1334
},
{
"epoch": 3.61,
"learning_rate": 7.658157602663707e-05,
"loss": 0.2776,
"step": 1380
},
{
"epoch": 3.81,
"learning_rate": 7.913429522752498e-05,
"loss": 0.2573,
"step": 1426
},
{
"epoch": 4.0,
"learning_rate": 8.168701442841288e-05,
"loss": 0.2282,
"step": 1472
},
{
"epoch": 4.2,
"learning_rate": 8.423973362930077e-05,
"loss": 0.2645,
"step": 1518
},
{
"epoch": 2.11,
"learning_rate": 8.679245283018869e-05,
"loss": 0.2183,
"step": 1564
},
{
"epoch": 2.31,
"learning_rate": 8.934517203107659e-05,
"loss": 0.2505,
"step": 1610
},
{
"epoch": 2.5,
"learning_rate": 9.189789123196448e-05,
"loss": 0.2229,
"step": 1656
},
{
"epoch": 2.7,
"learning_rate": 9.44506104328524e-05,
"loss": 0.2404,
"step": 1702
},
{
"epoch": 2.9,
"learning_rate": 9.700332963374029e-05,
"loss": 0.2336,
"step": 1748
},
{
"epoch": 3.09,
"learning_rate": 9.955604883462819e-05,
"loss": 0.2178,
"step": 1794
},
{
"epoch": 3.29,
"learning_rate": 9.976566354218057e-05,
"loss": 0.1597,
"step": 1840
},
{
"epoch": 3.49,
"learning_rate": 9.948199309324125e-05,
"loss": 0.1837,
"step": 1886
},
{
"epoch": 3.68,
"learning_rate": 9.919832264430194e-05,
"loss": 0.2176,
"step": 1932
},
{
"epoch": 3.88,
"learning_rate": 9.891465219536261e-05,
"loss": 0.1992,
"step": 1978
},
{
"epoch": 4.08,
"learning_rate": 9.863098174642329e-05,
"loss": 0.1929,
"step": 2024
},
{
"epoch": 4.27,
"learning_rate": 9.834731129748396e-05,
"loss": 0.2017,
"step": 2070
},
{
"epoch": 4.47,
"learning_rate": 9.806364084854464e-05,
"loss": 0.1916,
"step": 2116
},
{
"epoch": 4.67,
"learning_rate": 9.777997039960533e-05,
"loss": 0.205,
"step": 2162
},
{
"epoch": 4.86,
"learning_rate": 9.749629995066602e-05,
"loss": 0.2272,
"step": 2208
},
{
"epoch": 5.06,
"learning_rate": 9.72126295017267e-05,
"loss": 0.2049,
"step": 2254
},
{
"epoch": 5.26,
"learning_rate": 9.692895905278737e-05,
"loss": 0.172,
"step": 2300
},
{
"epoch": 3.17,
"learning_rate": 9.664528860384806e-05,
"loss": 0.1737,
"step": 2346
},
{
"epoch": 3.36,
"learning_rate": 9.636161815490874e-05,
"loss": 0.1516,
"step": 2392
},
{
"epoch": 3.56,
"learning_rate": 9.607794770596941e-05,
"loss": 0.1686,
"step": 2438
},
{
"epoch": 3.76,
"learning_rate": 9.579427725703009e-05,
"loss": 0.1555,
"step": 2484
},
{
"epoch": 3.95,
"learning_rate": 9.551060680809078e-05,
"loss": 0.1519,
"step": 2530
},
{
"epoch": 4.15,
"learning_rate": 9.522693635915147e-05,
"loss": 0.1844,
"step": 2576
},
{
"epoch": 4.35,
"learning_rate": 9.494326591021215e-05,
"loss": 0.1493,
"step": 2622
},
{
"epoch": 4.54,
"learning_rate": 9.465959546127282e-05,
"loss": 0.155,
"step": 2668
},
{
"epoch": 4.74,
"learning_rate": 9.43759250123335e-05,
"loss": 0.153,
"step": 2714
},
{
"epoch": 4.94,
"learning_rate": 9.409225456339417e-05,
"loss": 0.157,
"step": 2760
},
{
"epoch": 5.13,
"learning_rate": 9.380858411445486e-05,
"loss": 0.1439,
"step": 2806
},
{
"epoch": 5.33,
"learning_rate": 9.352491366551554e-05,
"loss": 0.1429,
"step": 2852
},
{
"epoch": 5.53,
"learning_rate": 9.324124321657623e-05,
"loss": 0.1609,
"step": 2898
},
{
"epoch": 5.72,
"learning_rate": 9.29575727676369e-05,
"loss": 0.1664,
"step": 2944
},
{
"epoch": 5.92,
"learning_rate": 9.26739023186976e-05,
"loss": 0.1534,
"step": 2990
},
{
"epoch": 6.12,
"learning_rate": 9.239023186975827e-05,
"loss": 0.1518,
"step": 3036
},
{
"epoch": 4.03,
"learning_rate": 9.210656142081895e-05,
"loss": 0.1435,
"step": 3082
},
{
"epoch": 4.22,
"learning_rate": 9.182289097187962e-05,
"loss": 0.1335,
"step": 3128
},
{
"epoch": 4.42,
"learning_rate": 9.15392205229403e-05,
"loss": 0.1231,
"step": 3174
},
{
"epoch": 4.62,
"learning_rate": 9.125555007400099e-05,
"loss": 0.1237,
"step": 3220
},
{
"epoch": 4.81,
"learning_rate": 9.097187962506168e-05,
"loss": 0.1312,
"step": 3266
},
{
"epoch": 5.01,
"learning_rate": 9.068820917612236e-05,
"loss": 0.133,
"step": 3312
},
{
"epoch": 5.21,
"learning_rate": 9.040453872718303e-05,
"loss": 0.1324,
"step": 3358
},
{
"epoch": 5.4,
"learning_rate": 9.012086827824372e-05,
"loss": 0.1232,
"step": 3404
},
{
"epoch": 5.6,
"learning_rate": 8.98371978293044e-05,
"loss": 0.1104,
"step": 3450
},
{
"epoch": 5.79,
"learning_rate": 8.955352738036507e-05,
"loss": 0.1166,
"step": 3496
},
{
"epoch": 5.99,
"learning_rate": 8.926985693142575e-05,
"loss": 0.1114,
"step": 3542
},
{
"epoch": 6.19,
"learning_rate": 8.898618648248643e-05,
"loss": 0.1276,
"step": 3588
},
{
"epoch": 6.38,
"learning_rate": 8.870251603354712e-05,
"loss": 0.127,
"step": 3634
},
{
"epoch": 6.58,
"learning_rate": 8.84188455846078e-05,
"loss": 0.1255,
"step": 3680
},
{
"epoch": 6.78,
"learning_rate": 8.813517513566848e-05,
"loss": 0.1269,
"step": 3726
},
{
"epoch": 6.97,
"learning_rate": 8.785150468672916e-05,
"loss": 0.1339,
"step": 3772
},
{
"epoch": 7.17,
"learning_rate": 8.756783423778983e-05,
"loss": 0.1326,
"step": 3818
},
{
"epoch": 5.08,
"learning_rate": 8.728416378885052e-05,
"loss": 0.1203,
"step": 3864
},
{
"epoch": 5.28,
"learning_rate": 8.70004933399112e-05,
"loss": 0.1026,
"step": 3910
},
{
"epoch": 5.47,
"learning_rate": 8.671682289097188e-05,
"loss": 0.1252,
"step": 3956
},
{
"epoch": 5.67,
"learning_rate": 8.643315244203257e-05,
"loss": 0.1052,
"step": 4002
},
{
"epoch": 5.87,
"learning_rate": 8.614948199309326e-05,
"loss": 0.1188,
"step": 4048
},
{
"epoch": 6.06,
"learning_rate": 8.586581154415393e-05,
"loss": 0.1113,
"step": 4094
},
{
"epoch": 6.26,
"learning_rate": 8.558214109521461e-05,
"loss": 0.0934,
"step": 4140
},
{
"epoch": 6.46,
"learning_rate": 8.529847064627528e-05,
"loss": 0.1168,
"step": 4186
},
{
"epoch": 6.65,
"learning_rate": 8.501480019733596e-05,
"loss": 0.1113,
"step": 4232
},
{
"epoch": 6.85,
"learning_rate": 8.473112974839665e-05,
"loss": 0.0944,
"step": 4278
},
{
"epoch": 7.05,
"learning_rate": 8.444745929945733e-05,
"loss": 0.0956,
"step": 4324
},
{
"epoch": 7.24,
"learning_rate": 8.416378885051802e-05,
"loss": 0.092,
"step": 4370
},
{
"epoch": 7.44,
"learning_rate": 8.388011840157869e-05,
"loss": 0.0899,
"step": 4416
},
{
"epoch": 7.64,
"learning_rate": 8.359644795263937e-05,
"loss": 0.111,
"step": 4462
},
{
"epoch": 7.83,
"learning_rate": 8.331277750370006e-05,
"loss": 0.114,
"step": 4508
},
{
"epoch": 8.03,
"learning_rate": 8.302910705476073e-05,
"loss": 0.0906,
"step": 4554
},
{
"epoch": 8.23,
"learning_rate": 8.274543660582141e-05,
"loss": 0.1053,
"step": 4600
},
{
"epoch": 6.14,
"learning_rate": 8.246176615688209e-05,
"loss": 0.0983,
"step": 4646
},
{
"epoch": 6.33,
"learning_rate": 8.217809570794278e-05,
"loss": 0.0864,
"step": 4692
},
{
"epoch": 6.53,
"learning_rate": 8.189442525900347e-05,
"loss": 0.0857,
"step": 4738
},
{
"epoch": 6.73,
"learning_rate": 8.161075481006414e-05,
"loss": 0.0979,
"step": 4784
},
{
"epoch": 6.92,
"learning_rate": 8.132708436112482e-05,
"loss": 0.0919,
"step": 4830
},
{
"epoch": 7.12,
"learning_rate": 8.10434139121855e-05,
"loss": 0.0758,
"step": 4876
},
{
"epoch": 7.32,
"learning_rate": 8.075974346324618e-05,
"loss": 0.0948,
"step": 4922
},
{
"epoch": 7.51,
"learning_rate": 8.047607301430686e-05,
"loss": 0.0856,
"step": 4968
},
{
"epoch": 7.71,
"learning_rate": 8.019240256536754e-05,
"loss": 0.0677,
"step": 5014
},
{
"epoch": 7.91,
"learning_rate": 7.990873211642823e-05,
"loss": 0.0855,
"step": 5060
},
{
"epoch": 8.1,
"learning_rate": 7.96250616674889e-05,
"loss": 0.0896,
"step": 5106
},
{
"epoch": 8.3,
"learning_rate": 7.934139121854959e-05,
"loss": 0.0999,
"step": 5152
},
{
"epoch": 8.5,
"learning_rate": 7.905772076961027e-05,
"loss": 0.0866,
"step": 5198
},
{
"epoch": 8.69,
"learning_rate": 7.877405032067094e-05,
"loss": 0.0821,
"step": 5244
},
{
"epoch": 8.89,
"learning_rate": 7.849037987173162e-05,
"loss": 0.0826,
"step": 5290
},
{
"epoch": 9.09,
"learning_rate": 7.820670942279231e-05,
"loss": 0.0872,
"step": 5336
},
{
"epoch": 9.28,
"learning_rate": 7.792303897385299e-05,
"loss": 0.1113,
"step": 5382
},
{
"epoch": 7.19,
"learning_rate": 7.763936852491366e-05,
"loss": 0.0628,
"step": 5428
},
{
"epoch": 7.39,
"learning_rate": 7.735569807597435e-05,
"loss": 0.0747,
"step": 5474
},
{
"epoch": 7.59,
"learning_rate": 7.707202762703503e-05,
"loss": 0.0653,
"step": 5520
},
{
"epoch": 7.78,
"learning_rate": 7.678835717809572e-05,
"loss": 0.0852,
"step": 5566
},
{
"epoch": 7.98,
"learning_rate": 7.65046867291564e-05,
"loss": 0.0661,
"step": 5612
},
{
"epoch": 8.18,
"learning_rate": 7.622101628021707e-05,
"loss": 0.0683,
"step": 5658
},
{
"epoch": 8.37,
"learning_rate": 7.593734583127775e-05,
"loss": 0.0865,
"step": 5704
},
{
"epoch": 8.57,
"learning_rate": 7.565367538233844e-05,
"loss": 0.0722,
"step": 5750
},
{
"epoch": 8.76,
"learning_rate": 7.537000493339911e-05,
"loss": 0.0802,
"step": 5796
},
{
"epoch": 8.96,
"learning_rate": 7.50863344844598e-05,
"loss": 0.0693,
"step": 5842
},
{
"epoch": 9.16,
"learning_rate": 7.480266403552048e-05,
"loss": 0.0817,
"step": 5888
},
{
"epoch": 9.35,
"learning_rate": 7.451899358658115e-05,
"loss": 0.0844,
"step": 5934
},
{
"epoch": 9.55,
"learning_rate": 7.423532313764184e-05,
"loss": 0.0803,
"step": 5980
},
{
"epoch": 9.75,
"learning_rate": 7.395165268870252e-05,
"loss": 0.0815,
"step": 6026
},
{
"epoch": 9.94,
"learning_rate": 7.36679822397632e-05,
"loss": 0.0871,
"step": 6072
},
{
"epoch": 10.14,
"learning_rate": 7.338431179082387e-05,
"loss": 0.0763,
"step": 6118
},
{
"epoch": 8.05,
"learning_rate": 7.310064134188456e-05,
"loss": 0.065,
"step": 6164
},
{
"epoch": 8.25,
"learning_rate": 7.281697089294525e-05,
"loss": 0.0629,
"step": 6210
},
{
"epoch": 8.44,
"learning_rate": 7.253330044400593e-05,
"loss": 0.0712,
"step": 6256
},
{
"epoch": 8.64,
"learning_rate": 7.22496299950666e-05,
"loss": 0.0645,
"step": 6302
},
{
"epoch": 8.84,
"learning_rate": 7.196595954612728e-05,
"loss": 0.0725,
"step": 6348
},
{
"epoch": 9.03,
"learning_rate": 7.168228909718797e-05,
"loss": 0.0573,
"step": 6394
},
{
"epoch": 9.23,
"learning_rate": 7.139861864824865e-05,
"loss": 0.0621,
"step": 6440
},
{
"epoch": 9.43,
"learning_rate": 7.111494819930932e-05,
"loss": 0.06,
"step": 6486
},
{
"epoch": 9.62,
"learning_rate": 7.083127775037001e-05,
"loss": 0.0646,
"step": 6532
},
{
"epoch": 9.82,
"learning_rate": 7.054760730143069e-05,
"loss": 0.0575,
"step": 6578
},
{
"epoch": 10.02,
"learning_rate": 7.026393685249138e-05,
"loss": 0.0634,
"step": 6624
},
{
"epoch": 10.21,
"learning_rate": 6.998026640355205e-05,
"loss": 0.0568,
"step": 6670
},
{
"epoch": 10.41,
"learning_rate": 6.969659595461273e-05,
"loss": 0.0652,
"step": 6716
},
{
"epoch": 10.61,
"learning_rate": 6.941292550567341e-05,
"loss": 0.0795,
"step": 6762
},
{
"epoch": 10.8,
"learning_rate": 6.912925505673408e-05,
"loss": 0.064,
"step": 6808
},
{
"epoch": 11.0,
"learning_rate": 6.884558460779477e-05,
"loss": 0.0846,
"step": 6854
},
{
"epoch": 11.2,
"learning_rate": 6.856191415885546e-05,
"loss": 0.0587,
"step": 6900
},
{
"epoch": 9.11,
"learning_rate": 6.827824370991614e-05,
"loss": 0.0663,
"step": 6946
},
{
"epoch": 9.3,
"learning_rate": 6.799457326097681e-05,
"loss": 0.0534,
"step": 6992
},
{
"epoch": 9.5,
"learning_rate": 6.77109028120375e-05,
"loss": 0.0466,
"step": 7038
},
{
"epoch": 9.7,
"learning_rate": 6.742723236309818e-05,
"loss": 0.0529,
"step": 7084
},
{
"epoch": 9.89,
"learning_rate": 6.714356191415886e-05,
"loss": 0.0595,
"step": 7130
},
{
"epoch": 10.09,
"learning_rate": 6.685989146521953e-05,
"loss": 0.0547,
"step": 7176
},
{
"epoch": 10.29,
"learning_rate": 6.657622101628021e-05,
"loss": 0.0571,
"step": 7222
},
{
"epoch": 10.48,
"learning_rate": 6.62925505673409e-05,
"loss": 0.0601,
"step": 7268
},
{
"epoch": 10.68,
"learning_rate": 6.600888011840159e-05,
"loss": 0.0508,
"step": 7314
},
{
"epoch": 10.88,
"learning_rate": 6.572520966946226e-05,
"loss": 0.0572,
"step": 7360
},
{
"epoch": 11.07,
"learning_rate": 6.544153922052294e-05,
"loss": 0.0591,
"step": 7406
},
{
"epoch": 11.27,
"learning_rate": 6.515786877158362e-05,
"loss": 0.0708,
"step": 7452
},
{
"epoch": 11.47,
"learning_rate": 6.487419832264431e-05,
"loss": 0.0711,
"step": 7498
},
{
"epoch": 11.66,
"learning_rate": 6.459052787370498e-05,
"loss": 0.0514,
"step": 7544
},
{
"epoch": 11.86,
"learning_rate": 6.430685742476566e-05,
"loss": 0.0607,
"step": 7590
},
{
"epoch": 12.06,
"learning_rate": 6.402318697582635e-05,
"loss": 0.0491,
"step": 7636
},
{
"epoch": 12.25,
"learning_rate": 6.373951652688704e-05,
"loss": 0.0577,
"step": 7682
},
{
"epoch": 10.16,
"learning_rate": 6.345584607794771e-05,
"loss": 0.0416,
"step": 7728
},
{
"epoch": 10.36,
"learning_rate": 6.317217562900839e-05,
"loss": 0.0572,
"step": 7774
},
{
"epoch": 10.56,
"learning_rate": 6.288850518006907e-05,
"loss": 0.0554,
"step": 7820
},
{
"epoch": 10.75,
"learning_rate": 6.260483473112974e-05,
"loss": 0.0457,
"step": 7866
},
{
"epoch": 10.95,
"learning_rate": 6.232116428219043e-05,
"loss": 0.0528,
"step": 7912
},
{
"epoch": 11.15,
"learning_rate": 6.203749383325111e-05,
"loss": 0.0524,
"step": 7958
},
{
"epoch": 11.34,
"learning_rate": 6.17538233843118e-05,
"loss": 0.0631,
"step": 8004
},
{
"epoch": 11.54,
"learning_rate": 6.147015293537247e-05,
"loss": 0.042,
"step": 8050
},
{
"epoch": 11.74,
"learning_rate": 6.118648248643315e-05,
"loss": 0.0422,
"step": 8096
},
{
"epoch": 11.93,
"learning_rate": 6.090281203749384e-05,
"loss": 0.0511,
"step": 8142
},
{
"epoch": 12.13,
"learning_rate": 6.061914158855452e-05,
"loss": 0.0526,
"step": 8188
},
{
"epoch": 12.32,
"learning_rate": 6.033547113961519e-05,
"loss": 0.0454,
"step": 8234
},
{
"epoch": 12.52,
"learning_rate": 6.0051800690675876e-05,
"loss": 0.0462,
"step": 8280
},
{
"epoch": 12.72,
"learning_rate": 5.9768130241736566e-05,
"loss": 0.0499,
"step": 8326
},
{
"epoch": 12.91,
"learning_rate": 5.948445979279724e-05,
"loss": 0.0444,
"step": 8372
},
{
"epoch": 13.11,
"learning_rate": 5.920078934385792e-05,
"loss": 0.0462,
"step": 8418
},
{
"epoch": 11.02,
"learning_rate": 5.89171188949186e-05,
"loss": 0.0443,
"step": 8464
},
{
"epoch": 11.22,
"learning_rate": 5.863344844597928e-05,
"loss": 0.0394,
"step": 8510
},
{
"epoch": 11.41,
"learning_rate": 5.834977799703997e-05,
"loss": 0.0463,
"step": 8556
},
{
"epoch": 11.61,
"learning_rate": 5.806610754810064e-05,
"loss": 0.0396,
"step": 8602
},
{
"epoch": 11.81,
"learning_rate": 5.7782437099161326e-05,
"loss": 0.0409,
"step": 8648
},
{
"epoch": 12.0,
"learning_rate": 5.7498766650222e-05,
"loss": 0.0407,
"step": 8694
},
{
"epoch": 12.2,
"learning_rate": 5.721509620128269e-05,
"loss": 0.0442,
"step": 8740
},
{
"epoch": 12.4,
"learning_rate": 5.693142575234337e-05,
"loss": 0.0437,
"step": 8786
},
{
"epoch": 12.59,
"learning_rate": 5.664775530340405e-05,
"loss": 0.0368,
"step": 8832
},
{
"epoch": 12.79,
"learning_rate": 5.636408485446473e-05,
"loss": 0.0418,
"step": 8878
},
{
"epoch": 12.99,
"learning_rate": 5.60804144055254e-05,
"loss": 0.0412,
"step": 8924
},
{
"epoch": 13.18,
"learning_rate": 5.579674395658609e-05,
"loss": 0.0385,
"step": 8970
},
{
"epoch": 13.38,
"learning_rate": 5.5513073507646776e-05,
"loss": 0.0469,
"step": 9016
},
{
"epoch": 13.58,
"learning_rate": 5.522940305870745e-05,
"loss": 0.0402,
"step": 9062
},
{
"epoch": 13.77,
"learning_rate": 5.494573260976813e-05,
"loss": 0.0514,
"step": 9108
},
{
"epoch": 13.97,
"learning_rate": 5.466206216082881e-05,
"loss": 0.045,
"step": 9154
},
{
"epoch": 14.17,
"learning_rate": 5.43783917118895e-05,
"loss": 0.0523,
"step": 9200
},
{
"epoch": 12.08,
"learning_rate": 5.409472126295018e-05,
"loss": 0.0338,
"step": 9246
},
{
"epoch": 12.27,
"learning_rate": 5.381105081401085e-05,
"loss": 0.0358,
"step": 9292
},
{
"epoch": 12.47,
"learning_rate": 5.3527380365071536e-05,
"loss": 0.0387,
"step": 9338
},
{
"epoch": 12.67,
"learning_rate": 5.3243709916132226e-05,
"loss": 0.0289,
"step": 9384
},
{
"epoch": 12.86,
"learning_rate": 5.29600394671929e-05,
"loss": 0.0396,
"step": 9430
},
{
"epoch": 13.06,
"learning_rate": 5.267636901825358e-05,
"loss": 0.0426,
"step": 9476
},
{
"epoch": 13.26,
"learning_rate": 5.239269856931426e-05,
"loss": 0.0345,
"step": 9522
},
{
"epoch": 13.45,
"learning_rate": 5.210902812037494e-05,
"loss": 0.0394,
"step": 9568
},
{
"epoch": 13.65,
"learning_rate": 5.182535767143563e-05,
"loss": 0.0349,
"step": 9614
},
{
"epoch": 13.85,
"learning_rate": 5.15416872224963e-05,
"loss": 0.0322,
"step": 9660
},
{
"epoch": 14.04,
"learning_rate": 5.1258016773556986e-05,
"loss": 0.0372,
"step": 9706
},
{
"epoch": 14.24,
"learning_rate": 5.097434632461766e-05,
"loss": 0.0334,
"step": 9752
},
{
"epoch": 14.44,
"learning_rate": 5.069067587567834e-05,
"loss": 0.0375,
"step": 9798
},
{
"epoch": 14.63,
"learning_rate": 5.040700542673903e-05,
"loss": 0.0396,
"step": 9844
},
{
"epoch": 14.83,
"learning_rate": 5.0123334977799704e-05,
"loss": 0.0356,
"step": 9890
},
{
"epoch": 15.03,
"learning_rate": 4.983966452886039e-05,
"loss": 0.0458,
"step": 9936
},
{
"epoch": 15.22,
"learning_rate": 4.955599407992107e-05,
"loss": 0.036,
"step": 9982
},
{
"epoch": 13.13,
"learning_rate": 4.9272323630981746e-05,
"loss": 0.028,
"step": 10028
},
{
"epoch": 13.33,
"learning_rate": 4.898865318204243e-05,
"loss": 0.0229,
"step": 10074
},
{
"epoch": 13.53,
"learning_rate": 4.870498273310311e-05,
"loss": 0.0353,
"step": 10120
},
{
"epoch": 13.72,
"learning_rate": 4.842131228416379e-05,
"loss": 0.0278,
"step": 10166
},
{
"epoch": 13.92,
"learning_rate": 4.813764183522447e-05,
"loss": 0.0296,
"step": 10212
},
{
"epoch": 14.12,
"learning_rate": 4.7853971386285154e-05,
"loss": 0.0284,
"step": 10258
},
{
"epoch": 14.31,
"learning_rate": 4.757030093734584e-05,
"loss": 0.0299,
"step": 10304
},
{
"epoch": 14.51,
"learning_rate": 4.728663048840651e-05,
"loss": 0.0309,
"step": 10350
},
{
"epoch": 14.71,
"learning_rate": 4.7002960039467196e-05,
"loss": 0.0283,
"step": 10396
},
{
"epoch": 14.9,
"learning_rate": 4.671928959052788e-05,
"loss": 0.0349,
"step": 10442
},
{
"epoch": 15.1,
"learning_rate": 4.6435619141588555e-05,
"loss": 0.0343,
"step": 10488
},
{
"epoch": 15.29,
"learning_rate": 4.615194869264924e-05,
"loss": 0.0347,
"step": 10534
},
{
"epoch": 15.49,
"learning_rate": 4.5868278243709914e-05,
"loss": 0.035,
"step": 10580
},
{
"epoch": 15.69,
"learning_rate": 4.55846077947706e-05,
"loss": 0.0284,
"step": 10626
},
{
"epoch": 15.88,
"learning_rate": 4.530093734583128e-05,
"loss": 0.0408,
"step": 10672
},
{
"epoch": 16.08,
"learning_rate": 4.501726689689196e-05,
"loss": 0.0386,
"step": 10718
},
{
"epoch": 16.28,
"learning_rate": 4.473359644795264e-05,
"loss": 0.0288,
"step": 10764
},
{
"epoch": 14.19,
"learning_rate": 4.444992599901332e-05,
"loss": 0.0274,
"step": 10810
},
{
"epoch": 14.38,
"learning_rate": 4.4166255550074005e-05,
"loss": 0.0268,
"step": 10856
},
{
"epoch": 14.58,
"learning_rate": 4.388258510113468e-05,
"loss": 0.0206,
"step": 10902
},
{
"epoch": 14.78,
"learning_rate": 4.3598914652195364e-05,
"loss": 0.0281,
"step": 10948
},
{
"epoch": 14.97,
"learning_rate": 4.331524420325605e-05,
"loss": 0.0242,
"step": 10994
},
{
"epoch": 15.17,
"learning_rate": 4.303157375431673e-05,
"loss": 0.0246,
"step": 11040
},
{
"epoch": 15.37,
"learning_rate": 4.2747903305377406e-05,
"loss": 0.0275,
"step": 11086
},
{
"epoch": 15.56,
"learning_rate": 4.246423285643808e-05,
"loss": 0.0259,
"step": 11132
},
{
"epoch": 15.76,
"learning_rate": 4.218056240749877e-05,
"loss": 0.0251,
"step": 11178
},
{
"epoch": 15.96,
"learning_rate": 4.189689195855945e-05,
"loss": 0.0229,
"step": 11224
},
{
"epoch": 16.15,
"learning_rate": 4.161322150962013e-05,
"loss": 0.0257,
"step": 11270
},
{
"epoch": 16.35,
"learning_rate": 4.132955106068081e-05,
"loss": 0.0269,
"step": 11316
},
{
"epoch": 16.55,
"learning_rate": 4.10458806117415e-05,
"loss": 0.0344,
"step": 11362
},
{
"epoch": 16.74,
"learning_rate": 4.0762210162802173e-05,
"loss": 0.0288,
"step": 11408
},
{
"epoch": 16.94,
"learning_rate": 4.047853971386285e-05,
"loss": 0.0287,
"step": 11454
},
{
"epoch": 17.14,
"learning_rate": 4.019486926492353e-05,
"loss": 0.0315,
"step": 11500
},
{
"epoch": 15.05,
"learning_rate": 3.9911198815984215e-05,
"loss": 0.0317,
"step": 11546
},
{
"epoch": 15.24,
"learning_rate": 3.96275283670449e-05,
"loss": 0.02,
"step": 11592
},
{
"epoch": 15.44,
"learning_rate": 3.9343857918105575e-05,
"loss": 0.0215,
"step": 11638
},
{
"epoch": 15.64,
"learning_rate": 3.906018746916626e-05,
"loss": 0.0276,
"step": 11684
},
{
"epoch": 15.83,
"learning_rate": 3.877651702022694e-05,
"loss": 0.0207,
"step": 11730
},
{
"epoch": 16.03,
"learning_rate": 3.8492846571287617e-05,
"loss": 0.0217,
"step": 11776
},
{
"epoch": 16.23,
"learning_rate": 3.82091761223483e-05,
"loss": 0.0254,
"step": 11822
},
{
"epoch": 16.42,
"learning_rate": 3.7925505673408976e-05,
"loss": 0.0245,
"step": 11868
},
{
"epoch": 16.62,
"learning_rate": 3.7641835224469665e-05,
"loss": 0.0275,
"step": 11914
},
{
"epoch": 16.82,
"learning_rate": 3.735816477553034e-05,
"loss": 0.0261,
"step": 11960
},
{
"epoch": 17.01,
"learning_rate": 3.7074494326591025e-05,
"loss": 0.0186,
"step": 12006
},
{
"epoch": 17.21,
"learning_rate": 3.67908238776517e-05,
"loss": 0.0186,
"step": 12052
},
{
"epoch": 17.41,
"learning_rate": 3.650715342871239e-05,
"loss": 0.0221,
"step": 12098
},
{
"epoch": 17.6,
"learning_rate": 3.6223482979773067e-05,
"loss": 0.0199,
"step": 12144
},
{
"epoch": 17.8,
"learning_rate": 3.593981253083374e-05,
"loss": 0.0241,
"step": 12190
},
{
"epoch": 18.0,
"learning_rate": 3.5656142081894426e-05,
"loss": 0.0224,
"step": 12236
},
{
"epoch": 18.19,
"learning_rate": 3.537247163295511e-05,
"loss": 0.0236,
"step": 12282
},
{
"epoch": 16.1,
"learning_rate": 3.508880118401579e-05,
"loss": 0.0206,
"step": 12328
},
{
"epoch": 16.3,
"learning_rate": 3.480513073507647e-05,
"loss": 0.0167,
"step": 12374
},
{
"epoch": 16.5,
"learning_rate": 3.452146028613715e-05,
"loss": 0.0209,
"step": 12420
},
{
"epoch": 16.69,
"learning_rate": 3.4237789837197834e-05,
"loss": 0.0188,
"step": 12466
},
{
"epoch": 16.89,
"learning_rate": 3.395411938825851e-05,
"loss": 0.0189,
"step": 12512
},
{
"epoch": 17.09,
"learning_rate": 3.367044893931919e-05,
"loss": 0.0229,
"step": 12558
},
{
"epoch": 17.28,
"learning_rate": 3.338677849037987e-05,
"loss": 0.0186,
"step": 12604
},
{
"epoch": 17.48,
"learning_rate": 3.310310804144056e-05,
"loss": 0.0218,
"step": 12650
},
{
"epoch": 17.68,
"learning_rate": 3.2819437592501235e-05,
"loss": 0.0165,
"step": 12696
},
{
"epoch": 17.87,
"learning_rate": 3.253576714356192e-05,
"loss": 0.0175,
"step": 12742
},
{
"epoch": 18.07,
"learning_rate": 3.2252096694622594e-05,
"loss": 0.0159,
"step": 12788
},
{
"epoch": 18.26,
"learning_rate": 3.196842624568328e-05,
"loss": 0.0174,
"step": 12834
},
{
"epoch": 18.46,
"learning_rate": 3.168475579674396e-05,
"loss": 0.0195,
"step": 12880
},
{
"epoch": 18.66,
"learning_rate": 3.1401085347804636e-05,
"loss": 0.0203,
"step": 12926
},
{
"epoch": 18.85,
"learning_rate": 3.111741489886532e-05,
"loss": 0.019,
"step": 12972
},
{
"epoch": 19.05,
"learning_rate": 3.0833744449926e-05,
"loss": 0.0197,
"step": 13018
},
{
"epoch": 19.25,
"learning_rate": 3.0550074000986685e-05,
"loss": 0.0221,
"step": 13064
},
{
"epoch": 17.16,
"learning_rate": 3.026640355204736e-05,
"loss": 0.0191,
"step": 13110
},
{
"epoch": 17.35,
"learning_rate": 2.998273310310804e-05,
"loss": 0.0138,
"step": 13156
},
{
"epoch": 17.55,
"learning_rate": 2.9699062654168723e-05,
"loss": 0.0149,
"step": 13202
},
{
"epoch": 17.75,
"learning_rate": 2.9415392205229403e-05,
"loss": 0.018,
"step": 13248
},
{
"epoch": 17.94,
"learning_rate": 2.9131721756290086e-05,
"loss": 0.0162,
"step": 13294
},
{
"epoch": 18.14,
"learning_rate": 2.8848051307350765e-05,
"loss": 0.0153,
"step": 13340
},
{
"epoch": 18.34,
"learning_rate": 2.8564380858411448e-05,
"loss": 0.0141,
"step": 13386
},
{
"epoch": 18.53,
"learning_rate": 2.8280710409472128e-05,
"loss": 0.013,
"step": 13432
},
{
"epoch": 18.73,
"learning_rate": 2.7997039960532807e-05,
"loss": 0.0184,
"step": 13478
},
{
"epoch": 18.93,
"learning_rate": 2.771336951159349e-05,
"loss": 0.0149,
"step": 13524
},
{
"epoch": 19.12,
"learning_rate": 2.742969906265417e-05,
"loss": 0.0159,
"step": 13570
},
{
"epoch": 19.32,
"learning_rate": 2.7146028613714853e-05,
"loss": 0.0171,
"step": 13616
},
{
"epoch": 19.52,
"learning_rate": 2.6862358164775532e-05,
"loss": 0.0174,
"step": 13662
},
{
"epoch": 19.71,
"learning_rate": 2.6578687715836215e-05,
"loss": 0.0159,
"step": 13708
},
{
"epoch": 19.91,
"learning_rate": 2.629501726689689e-05,
"loss": 0.0202,
"step": 13754
},
{
"epoch": 20.11,
"learning_rate": 2.601134681795757e-05,
"loss": 0.0182,
"step": 13800
},
{
"epoch": 18.02,
"learning_rate": 2.5727676369018254e-05,
"loss": 0.0138,
"step": 13846
},
{
"epoch": 18.21,
"learning_rate": 2.5444005920078933e-05,
"loss": 0.0136,
"step": 13892
},
{
"epoch": 18.41,
"learning_rate": 2.5160335471139616e-05,
"loss": 0.0135,
"step": 13938
},
{
"epoch": 18.61,
"learning_rate": 2.4876665022200296e-05,
"loss": 0.0125,
"step": 13984
},
{
"epoch": 18.8,
"learning_rate": 2.459299457326098e-05,
"loss": 0.014,
"step": 14030
},
{
"epoch": 19.0,
"learning_rate": 2.430932412432166e-05,
"loss": 0.0137,
"step": 14076
},
{
"epoch": 19.2,
"learning_rate": 2.402565367538234e-05,
"loss": 0.0162,
"step": 14122
},
{
"epoch": 19.39,
"learning_rate": 2.374198322644302e-05,
"loss": 0.0134,
"step": 14168
},
{
"epoch": 19.59,
"learning_rate": 2.34583127775037e-05,
"loss": 0.0113,
"step": 14214
},
{
"epoch": 19.79,
"learning_rate": 2.3174642328564383e-05,
"loss": 0.0134,
"step": 14260
},
{
"epoch": 19.98,
"learning_rate": 2.2890971879625063e-05,
"loss": 0.0135,
"step": 14306
},
{
"epoch": 20.18,
"learning_rate": 2.2607301430685742e-05,
"loss": 0.0132,
"step": 14352
},
{
"epoch": 20.38,
"learning_rate": 2.2323630981746425e-05,
"loss": 0.015,
"step": 14398
},
{
"epoch": 20.57,
"learning_rate": 2.2039960532807105e-05,
"loss": 0.0117,
"step": 14444
},
{
"epoch": 20.77,
"learning_rate": 2.1756290083867788e-05,
"loss": 0.0148,
"step": 14490
},
{
"epoch": 20.97,
"learning_rate": 2.1472619634928467e-05,
"loss": 0.0168,
"step": 14536
},
{
"epoch": 21.16,
"learning_rate": 2.118894918598915e-05,
"loss": 0.0139,
"step": 14582
},
{
"epoch": 19.07,
"learning_rate": 2.0905278737049827e-05,
"loss": 0.0112,
"step": 14628
},
{
"epoch": 19.27,
"learning_rate": 2.062160828811051e-05,
"loss": 0.0094,
"step": 14674
},
{
"epoch": 19.47,
"learning_rate": 2.033793783917119e-05,
"loss": 0.0092,
"step": 14720
},
{
"epoch": 19.66,
"learning_rate": 2.0054267390231872e-05,
"loss": 0.0111,
"step": 14766
},
{
"epoch": 19.86,
"learning_rate": 1.977059694129255e-05,
"loss": 0.0112,
"step": 14812
},
{
"epoch": 20.06,
"learning_rate": 1.9486926492353234e-05,
"loss": 0.0127,
"step": 14858
},
{
"epoch": 20.25,
"learning_rate": 1.9203256043413914e-05,
"loss": 0.0105,
"step": 14904
},
{
"epoch": 20.45,
"learning_rate": 1.8919585594474594e-05,
"loss": 0.0142,
"step": 14950
},
{
"epoch": 20.65,
"learning_rate": 1.8635915145535273e-05,
"loss": 0.0106,
"step": 14996
},
{
"epoch": 20.84,
"learning_rate": 1.8352244696595956e-05,
"loss": 0.0115,
"step": 15042
},
{
"epoch": 21.04,
"learning_rate": 1.8068574247656636e-05,
"loss": 0.0114,
"step": 15088
},
{
"epoch": 21.24,
"learning_rate": 1.778490379871732e-05,
"loss": 0.012,
"step": 15134
},
{
"epoch": 21.43,
"learning_rate": 1.7501233349777998e-05,
"loss": 0.0111,
"step": 15180
},
{
"epoch": 21.63,
"learning_rate": 1.721756290083868e-05,
"loss": 0.0127,
"step": 15226
},
{
"epoch": 21.82,
"learning_rate": 1.693389245189936e-05,
"loss": 0.0108,
"step": 15272
},
{
"epoch": 22.02,
"learning_rate": 1.665022200296004e-05,
"loss": 0.0123,
"step": 15318
},
{
"epoch": 22.22,
"learning_rate": 1.636655155402072e-05,
"loss": 0.0116,
"step": 15364
},
{
"epoch": 20.13,
"learning_rate": 1.6082881105081403e-05,
"loss": 0.0115,
"step": 15410
},
{
"epoch": 20.32,
"learning_rate": 1.5799210656142082e-05,
"loss": 0.0083,
"step": 15456
},
{
"epoch": 20.52,
"learning_rate": 1.5515540207202765e-05,
"loss": 0.0094,
"step": 15502
},
{
"epoch": 20.72,
"learning_rate": 1.5231869758263445e-05,
"loss": 0.0087,
"step": 15548
},
{
"epoch": 20.91,
"learning_rate": 1.4948199309324126e-05,
"loss": 0.0119,
"step": 15594
},
{
"epoch": 21.11,
"learning_rate": 1.4664528860384805e-05,
"loss": 0.009,
"step": 15640
},
{
"epoch": 21.31,
"learning_rate": 1.4380858411445487e-05,
"loss": 0.0113,
"step": 15686
},
{
"epoch": 21.5,
"learning_rate": 1.4097187962506166e-05,
"loss": 0.0096,
"step": 15732
},
{
"epoch": 21.7,
"learning_rate": 1.3813517513566847e-05,
"loss": 0.0103,
"step": 15778
},
{
"epoch": 21.9,
"learning_rate": 1.3529847064627529e-05,
"loss": 0.0089,
"step": 15824
},
{
"epoch": 22.09,
"learning_rate": 1.324617661568821e-05,
"loss": 0.0082,
"step": 15870
},
{
"epoch": 22.29,
"learning_rate": 1.2962506166748891e-05,
"loss": 0.01,
"step": 15916
},
{
"epoch": 22.49,
"learning_rate": 1.267883571780957e-05,
"loss": 0.0084,
"step": 15962
},
{
"epoch": 22.68,
"learning_rate": 1.2395165268870252e-05,
"loss": 0.0093,
"step": 16008
},
{
"epoch": 22.88,
"learning_rate": 1.2111494819930933e-05,
"loss": 0.0104,
"step": 16054
},
{
"epoch": 23.08,
"learning_rate": 1.1827824370991614e-05,
"loss": 0.0117,
"step": 16100
},
{
"epoch": 23.27,
"learning_rate": 1.1544153922052296e-05,
"loss": 0.0095,
"step": 16146
},
{
"epoch": 21.18,
"learning_rate": 1.1260483473112975e-05,
"loss": 0.0085,
"step": 16192
},
{
"epoch": 21.38,
"learning_rate": 1.0976813024173656e-05,
"loss": 0.0083,
"step": 16238
},
{
"epoch": 21.58,
"learning_rate": 1.0693142575234338e-05,
"loss": 0.0081,
"step": 16284
},
{
"epoch": 21.77,
"learning_rate": 1.0409472126295019e-05,
"loss": 0.0068,
"step": 16330
},
{
"epoch": 21.97,
"learning_rate": 1.0125801677355699e-05,
"loss": 0.0098,
"step": 16376
},
{
"epoch": 22.17,
"learning_rate": 9.84213122841638e-06,
"loss": 0.0087,
"step": 16422
},
{
"epoch": 22.36,
"learning_rate": 9.558460779477061e-06,
"loss": 0.007,
"step": 16468
},
{
"epoch": 22.56,
"learning_rate": 9.274790330537742e-06,
"loss": 0.0085,
"step": 16514
},
{
"epoch": 22.76,
"learning_rate": 8.991119881598422e-06,
"loss": 0.0094,
"step": 16560
},
{
"epoch": 22.95,
"learning_rate": 8.707449432659103e-06,
"loss": 0.0077,
"step": 16606
},
{
"epoch": 23.15,
"learning_rate": 8.423778983719784e-06,
"loss": 0.0077,
"step": 16652
},
{
"epoch": 23.35,
"learning_rate": 8.140108534780466e-06,
"loss": 0.008,
"step": 16698
},
{
"epoch": 23.54,
"learning_rate": 7.856438085841145e-06,
"loss": 0.0088,
"step": 16744
},
{
"epoch": 23.74,
"learning_rate": 7.5727676369018255e-06,
"loss": 0.0078,
"step": 16790
},
{
"epoch": 23.94,
"learning_rate": 7.289097187962507e-06,
"loss": 0.0095,
"step": 16836
},
{
"epoch": 24.13,
"learning_rate": 7.005426739023187e-06,
"loss": 0.0079,
"step": 16882
},
{
"epoch": 22.04,
"learning_rate": 6.721756290083868e-06,
"loss": 0.0068,
"step": 16928
},
{
"epoch": 22.24,
"learning_rate": 6.438085841144549e-06,
"loss": 0.0074,
"step": 16974
},
{
"epoch": 22.44,
"learning_rate": 6.15441539220523e-06,
"loss": 0.0095,
"step": 17020
},
{
"epoch": 22.63,
"learning_rate": 5.87074494326591e-06,
"loss": 0.007,
"step": 17066
},
{
"epoch": 22.83,
"learning_rate": 5.587074494326592e-06,
"loss": 0.0059,
"step": 17112
},
{
"epoch": 23.03,
"learning_rate": 5.303404045387272e-06,
"loss": 0.0075,
"step": 17158
},
{
"epoch": 23.22,
"learning_rate": 5.019733596447953e-06,
"loss": 0.0069,
"step": 17204
},
{
"epoch": 23.42,
"learning_rate": 4.736063147508634e-06,
"loss": 0.0068,
"step": 17250
},
{
"epoch": 23.62,
"learning_rate": 4.452392698569315e-06,
"loss": 0.0079,
"step": 17296
},
{
"epoch": 23.81,
"learning_rate": 4.168722249629995e-06,
"loss": 0.0068,
"step": 17342
},
{
"epoch": 24.01,
"learning_rate": 3.8850518006906765e-06,
"loss": 0.0074,
"step": 17388
},
{
"epoch": 24.21,
"learning_rate": 3.6013813517513565e-06,
"loss": 0.0068,
"step": 17434
},
{
"epoch": 24.4,
"learning_rate": 3.3177109028120377e-06,
"loss": 0.0076,
"step": 17480
},
{
"epoch": 24.6,
"learning_rate": 3.0340404538727186e-06,
"loss": 0.0063,
"step": 17526
},
{
"epoch": 24.79,
"learning_rate": 2.7503700049333994e-06,
"loss": 0.0066,
"step": 17572
},
{
"epoch": 24.99,
"learning_rate": 2.46669955599408e-06,
"loss": 0.0077,
"step": 17618
},
{
"epoch": 25.19,
"learning_rate": 2.183029107054761e-06,
"loss": 0.0077,
"step": 17664
},
{
"epoch": 23.1,
"learning_rate": 1.8993586581154416e-06,
"loss": 0.0065,
"step": 17710
},
{
"epoch": 23.29,
"learning_rate": 1.6156882091761224e-06,
"loss": 0.007,
"step": 17756
},
{
"epoch": 23.49,
"learning_rate": 1.3320177602368033e-06,
"loss": 0.0064,
"step": 17802
},
{
"epoch": 23.69,
"learning_rate": 1.048347311297484e-06,
"loss": 0.0066,
"step": 17848
},
{
"epoch": 23.88,
"learning_rate": 7.646768623581648e-07,
"loss": 0.0057,
"step": 17894
},
{
"epoch": 24.08,
"learning_rate": 4.810064134188456e-07,
"loss": 0.0063,
"step": 17940
},
{
"epoch": 24.28,
"learning_rate": 1.973359644795264e-07,
"loss": 0.0057,
"step": 17986
}
],
"logging_steps": 46,
"max_steps": 18018,
"num_train_epochs": 77,
"save_steps": 500,
"total_flos": 7.874587036609413e+17,
"trial_name": null,
"trial_params": null
}