|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 24.414529914529915, |
|
"eval_steps": 500, |
|
"global_step": 18018, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.5527192008879026e-06, |
|
"loss": 0.8199, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.105438401775805e-06, |
|
"loss": 0.8791, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.658157602663706e-06, |
|
"loss": 0.7995, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.021087680355161e-05, |
|
"loss": 0.7234, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2763596004439513e-05, |
|
"loss": 0.6077, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.5316315205327412e-05, |
|
"loss": 0.5855, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.786903440621532e-05, |
|
"loss": 0.5165, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.042175360710322e-05, |
|
"loss": 0.5176, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.297447280799112e-05, |
|
"loss": 0.4479, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.5527192008879026e-05, |
|
"loss": 0.4451, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.807991120976693e-05, |
|
"loss": 0.4133, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.0632630410654825e-05, |
|
"loss": 0.3851, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.3185349611542734e-05, |
|
"loss": 0.3973, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 3.573806881243064e-05, |
|
"loss": 0.3546, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.829078801331853e-05, |
|
"loss": 0.329, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.084350721420644e-05, |
|
"loss": 0.3476, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.3396226415094345e-05, |
|
"loss": 0.3039, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.594894561598224e-05, |
|
"loss": 0.3156, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.850166481687014e-05, |
|
"loss": 0.3129, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5.105438401775805e-05, |
|
"loss": 0.2968, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5.360710321864595e-05, |
|
"loss": 0.3168, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.615982241953386e-05, |
|
"loss": 0.3347, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.871254162042176e-05, |
|
"loss": 0.3011, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 6.126526082130965e-05, |
|
"loss": 0.2856, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.381798002219757e-05, |
|
"loss": 0.2626, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 6.637069922308547e-05, |
|
"loss": 0.305, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 6.892341842397336e-05, |
|
"loss": 0.2895, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 7.147613762486127e-05, |
|
"loss": 0.2791, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 7.402885682574918e-05, |
|
"loss": 0.2542, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 7.658157602663707e-05, |
|
"loss": 0.2776, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 7.913429522752498e-05, |
|
"loss": 0.2573, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.168701442841288e-05, |
|
"loss": 0.2282, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 8.423973362930077e-05, |
|
"loss": 0.2645, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.679245283018869e-05, |
|
"loss": 0.2183, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 8.934517203107659e-05, |
|
"loss": 0.2505, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.189789123196448e-05, |
|
"loss": 0.2229, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.44506104328524e-05, |
|
"loss": 0.2404, |
|
"step": 1702 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.700332963374029e-05, |
|
"loss": 0.2336, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 9.955604883462819e-05, |
|
"loss": 0.2178, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 9.976566354218057e-05, |
|
"loss": 0.1597, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 9.948199309324125e-05, |
|
"loss": 0.1837, |
|
"step": 1886 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 9.919832264430194e-05, |
|
"loss": 0.2176, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 9.891465219536261e-05, |
|
"loss": 0.1992, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 9.863098174642329e-05, |
|
"loss": 0.1929, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 9.834731129748396e-05, |
|
"loss": 0.2017, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 9.806364084854464e-05, |
|
"loss": 0.1916, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 9.777997039960533e-05, |
|
"loss": 0.205, |
|
"step": 2162 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 9.749629995066602e-05, |
|
"loss": 0.2272, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 9.72126295017267e-05, |
|
"loss": 0.2049, |
|
"step": 2254 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 9.692895905278737e-05, |
|
"loss": 0.172, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 9.664528860384806e-05, |
|
"loss": 0.1737, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 9.636161815490874e-05, |
|
"loss": 0.1516, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 9.607794770596941e-05, |
|
"loss": 0.1686, |
|
"step": 2438 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 9.579427725703009e-05, |
|
"loss": 0.1555, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 9.551060680809078e-05, |
|
"loss": 0.1519, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 9.522693635915147e-05, |
|
"loss": 0.1844, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 9.494326591021215e-05, |
|
"loss": 0.1493, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 9.465959546127282e-05, |
|
"loss": 0.155, |
|
"step": 2668 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 9.43759250123335e-05, |
|
"loss": 0.153, |
|
"step": 2714 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 9.409225456339417e-05, |
|
"loss": 0.157, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 9.380858411445486e-05, |
|
"loss": 0.1439, |
|
"step": 2806 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 9.352491366551554e-05, |
|
"loss": 0.1429, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 9.324124321657623e-05, |
|
"loss": 0.1609, |
|
"step": 2898 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 9.29575727676369e-05, |
|
"loss": 0.1664, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 9.26739023186976e-05, |
|
"loss": 0.1534, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 9.239023186975827e-05, |
|
"loss": 0.1518, |
|
"step": 3036 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 9.210656142081895e-05, |
|
"loss": 0.1435, |
|
"step": 3082 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 9.182289097187962e-05, |
|
"loss": 0.1335, |
|
"step": 3128 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 9.15392205229403e-05, |
|
"loss": 0.1231, |
|
"step": 3174 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 9.125555007400099e-05, |
|
"loss": 0.1237, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 9.097187962506168e-05, |
|
"loss": 0.1312, |
|
"step": 3266 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 9.068820917612236e-05, |
|
"loss": 0.133, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 9.040453872718303e-05, |
|
"loss": 0.1324, |
|
"step": 3358 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 9.012086827824372e-05, |
|
"loss": 0.1232, |
|
"step": 3404 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 8.98371978293044e-05, |
|
"loss": 0.1104, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 8.955352738036507e-05, |
|
"loss": 0.1166, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 8.926985693142575e-05, |
|
"loss": 0.1114, |
|
"step": 3542 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 8.898618648248643e-05, |
|
"loss": 0.1276, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 8.870251603354712e-05, |
|
"loss": 0.127, |
|
"step": 3634 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 8.84188455846078e-05, |
|
"loss": 0.1255, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 8.813517513566848e-05, |
|
"loss": 0.1269, |
|
"step": 3726 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 8.785150468672916e-05, |
|
"loss": 0.1339, |
|
"step": 3772 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 8.756783423778983e-05, |
|
"loss": 0.1326, |
|
"step": 3818 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 8.728416378885052e-05, |
|
"loss": 0.1203, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 8.70004933399112e-05, |
|
"loss": 0.1026, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 8.671682289097188e-05, |
|
"loss": 0.1252, |
|
"step": 3956 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 8.643315244203257e-05, |
|
"loss": 0.1052, |
|
"step": 4002 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 8.614948199309326e-05, |
|
"loss": 0.1188, |
|
"step": 4048 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 8.586581154415393e-05, |
|
"loss": 0.1113, |
|
"step": 4094 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 8.558214109521461e-05, |
|
"loss": 0.0934, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 8.529847064627528e-05, |
|
"loss": 0.1168, |
|
"step": 4186 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 8.501480019733596e-05, |
|
"loss": 0.1113, |
|
"step": 4232 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 8.473112974839665e-05, |
|
"loss": 0.0944, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 8.444745929945733e-05, |
|
"loss": 0.0956, |
|
"step": 4324 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 8.416378885051802e-05, |
|
"loss": 0.092, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 8.388011840157869e-05, |
|
"loss": 0.0899, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 8.359644795263937e-05, |
|
"loss": 0.111, |
|
"step": 4462 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 8.331277750370006e-05, |
|
"loss": 0.114, |
|
"step": 4508 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 8.302910705476073e-05, |
|
"loss": 0.0906, |
|
"step": 4554 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 8.274543660582141e-05, |
|
"loss": 0.1053, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 8.246176615688209e-05, |
|
"loss": 0.0983, |
|
"step": 4646 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 8.217809570794278e-05, |
|
"loss": 0.0864, |
|
"step": 4692 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 8.189442525900347e-05, |
|
"loss": 0.0857, |
|
"step": 4738 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 8.161075481006414e-05, |
|
"loss": 0.0979, |
|
"step": 4784 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 8.132708436112482e-05, |
|
"loss": 0.0919, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 8.10434139121855e-05, |
|
"loss": 0.0758, |
|
"step": 4876 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 8.075974346324618e-05, |
|
"loss": 0.0948, |
|
"step": 4922 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 8.047607301430686e-05, |
|
"loss": 0.0856, |
|
"step": 4968 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 8.019240256536754e-05, |
|
"loss": 0.0677, |
|
"step": 5014 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 7.990873211642823e-05, |
|
"loss": 0.0855, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 7.96250616674889e-05, |
|
"loss": 0.0896, |
|
"step": 5106 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 7.934139121854959e-05, |
|
"loss": 0.0999, |
|
"step": 5152 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 7.905772076961027e-05, |
|
"loss": 0.0866, |
|
"step": 5198 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 7.877405032067094e-05, |
|
"loss": 0.0821, |
|
"step": 5244 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 7.849037987173162e-05, |
|
"loss": 0.0826, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 7.820670942279231e-05, |
|
"loss": 0.0872, |
|
"step": 5336 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 7.792303897385299e-05, |
|
"loss": 0.1113, |
|
"step": 5382 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 7.763936852491366e-05, |
|
"loss": 0.0628, |
|
"step": 5428 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 7.735569807597435e-05, |
|
"loss": 0.0747, |
|
"step": 5474 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 7.707202762703503e-05, |
|
"loss": 0.0653, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 7.678835717809572e-05, |
|
"loss": 0.0852, |
|
"step": 5566 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 7.65046867291564e-05, |
|
"loss": 0.0661, |
|
"step": 5612 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 7.622101628021707e-05, |
|
"loss": 0.0683, |
|
"step": 5658 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 7.593734583127775e-05, |
|
"loss": 0.0865, |
|
"step": 5704 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 7.565367538233844e-05, |
|
"loss": 0.0722, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 7.537000493339911e-05, |
|
"loss": 0.0802, |
|
"step": 5796 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 7.50863344844598e-05, |
|
"loss": 0.0693, |
|
"step": 5842 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 7.480266403552048e-05, |
|
"loss": 0.0817, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 7.451899358658115e-05, |
|
"loss": 0.0844, |
|
"step": 5934 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 7.423532313764184e-05, |
|
"loss": 0.0803, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 7.395165268870252e-05, |
|
"loss": 0.0815, |
|
"step": 6026 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 7.36679822397632e-05, |
|
"loss": 0.0871, |
|
"step": 6072 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 7.338431179082387e-05, |
|
"loss": 0.0763, |
|
"step": 6118 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 7.310064134188456e-05, |
|
"loss": 0.065, |
|
"step": 6164 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 7.281697089294525e-05, |
|
"loss": 0.0629, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 7.253330044400593e-05, |
|
"loss": 0.0712, |
|
"step": 6256 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 7.22496299950666e-05, |
|
"loss": 0.0645, |
|
"step": 6302 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 7.196595954612728e-05, |
|
"loss": 0.0725, |
|
"step": 6348 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 7.168228909718797e-05, |
|
"loss": 0.0573, |
|
"step": 6394 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 7.139861864824865e-05, |
|
"loss": 0.0621, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 7.111494819930932e-05, |
|
"loss": 0.06, |
|
"step": 6486 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 7.083127775037001e-05, |
|
"loss": 0.0646, |
|
"step": 6532 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 7.054760730143069e-05, |
|
"loss": 0.0575, |
|
"step": 6578 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 7.026393685249138e-05, |
|
"loss": 0.0634, |
|
"step": 6624 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 6.998026640355205e-05, |
|
"loss": 0.0568, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 6.969659595461273e-05, |
|
"loss": 0.0652, |
|
"step": 6716 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 6.941292550567341e-05, |
|
"loss": 0.0795, |
|
"step": 6762 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 6.912925505673408e-05, |
|
"loss": 0.064, |
|
"step": 6808 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 6.884558460779477e-05, |
|
"loss": 0.0846, |
|
"step": 6854 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 6.856191415885546e-05, |
|
"loss": 0.0587, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 6.827824370991614e-05, |
|
"loss": 0.0663, |
|
"step": 6946 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 6.799457326097681e-05, |
|
"loss": 0.0534, |
|
"step": 6992 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 6.77109028120375e-05, |
|
"loss": 0.0466, |
|
"step": 7038 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 6.742723236309818e-05, |
|
"loss": 0.0529, |
|
"step": 7084 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 6.714356191415886e-05, |
|
"loss": 0.0595, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 6.685989146521953e-05, |
|
"loss": 0.0547, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 6.657622101628021e-05, |
|
"loss": 0.0571, |
|
"step": 7222 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 6.62925505673409e-05, |
|
"loss": 0.0601, |
|
"step": 7268 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 6.600888011840159e-05, |
|
"loss": 0.0508, |
|
"step": 7314 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 6.572520966946226e-05, |
|
"loss": 0.0572, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 6.544153922052294e-05, |
|
"loss": 0.0591, |
|
"step": 7406 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 6.515786877158362e-05, |
|
"loss": 0.0708, |
|
"step": 7452 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 6.487419832264431e-05, |
|
"loss": 0.0711, |
|
"step": 7498 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 6.459052787370498e-05, |
|
"loss": 0.0514, |
|
"step": 7544 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 6.430685742476566e-05, |
|
"loss": 0.0607, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 6.402318697582635e-05, |
|
"loss": 0.0491, |
|
"step": 7636 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 6.373951652688704e-05, |
|
"loss": 0.0577, |
|
"step": 7682 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 6.345584607794771e-05, |
|
"loss": 0.0416, |
|
"step": 7728 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 6.317217562900839e-05, |
|
"loss": 0.0572, |
|
"step": 7774 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 6.288850518006907e-05, |
|
"loss": 0.0554, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 6.260483473112974e-05, |
|
"loss": 0.0457, |
|
"step": 7866 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 6.232116428219043e-05, |
|
"loss": 0.0528, |
|
"step": 7912 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 6.203749383325111e-05, |
|
"loss": 0.0524, |
|
"step": 7958 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 6.17538233843118e-05, |
|
"loss": 0.0631, |
|
"step": 8004 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 6.147015293537247e-05, |
|
"loss": 0.042, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 6.118648248643315e-05, |
|
"loss": 0.0422, |
|
"step": 8096 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 6.090281203749384e-05, |
|
"loss": 0.0511, |
|
"step": 8142 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 6.061914158855452e-05, |
|
"loss": 0.0526, |
|
"step": 8188 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 6.033547113961519e-05, |
|
"loss": 0.0454, |
|
"step": 8234 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 6.0051800690675876e-05, |
|
"loss": 0.0462, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 5.9768130241736566e-05, |
|
"loss": 0.0499, |
|
"step": 8326 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 5.948445979279724e-05, |
|
"loss": 0.0444, |
|
"step": 8372 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 5.920078934385792e-05, |
|
"loss": 0.0462, |
|
"step": 8418 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 5.89171188949186e-05, |
|
"loss": 0.0443, |
|
"step": 8464 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 5.863344844597928e-05, |
|
"loss": 0.0394, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 5.834977799703997e-05, |
|
"loss": 0.0463, |
|
"step": 8556 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 5.806610754810064e-05, |
|
"loss": 0.0396, |
|
"step": 8602 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 5.7782437099161326e-05, |
|
"loss": 0.0409, |
|
"step": 8648 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 5.7498766650222e-05, |
|
"loss": 0.0407, |
|
"step": 8694 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 5.721509620128269e-05, |
|
"loss": 0.0442, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 5.693142575234337e-05, |
|
"loss": 0.0437, |
|
"step": 8786 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 5.664775530340405e-05, |
|
"loss": 0.0368, |
|
"step": 8832 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 5.636408485446473e-05, |
|
"loss": 0.0418, |
|
"step": 8878 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 5.60804144055254e-05, |
|
"loss": 0.0412, |
|
"step": 8924 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 5.579674395658609e-05, |
|
"loss": 0.0385, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"learning_rate": 5.5513073507646776e-05, |
|
"loss": 0.0469, |
|
"step": 9016 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"learning_rate": 5.522940305870745e-05, |
|
"loss": 0.0402, |
|
"step": 9062 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 5.494573260976813e-05, |
|
"loss": 0.0514, |
|
"step": 9108 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 5.466206216082881e-05, |
|
"loss": 0.045, |
|
"step": 9154 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 5.43783917118895e-05, |
|
"loss": 0.0523, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"learning_rate": 5.409472126295018e-05, |
|
"loss": 0.0338, |
|
"step": 9246 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 5.381105081401085e-05, |
|
"loss": 0.0358, |
|
"step": 9292 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 5.3527380365071536e-05, |
|
"loss": 0.0387, |
|
"step": 9338 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 5.3243709916132226e-05, |
|
"loss": 0.0289, |
|
"step": 9384 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 5.29600394671929e-05, |
|
"loss": 0.0396, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 5.267636901825358e-05, |
|
"loss": 0.0426, |
|
"step": 9476 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 5.239269856931426e-05, |
|
"loss": 0.0345, |
|
"step": 9522 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 5.210902812037494e-05, |
|
"loss": 0.0394, |
|
"step": 9568 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 5.182535767143563e-05, |
|
"loss": 0.0349, |
|
"step": 9614 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"learning_rate": 5.15416872224963e-05, |
|
"loss": 0.0322, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 5.1258016773556986e-05, |
|
"loss": 0.0372, |
|
"step": 9706 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 5.097434632461766e-05, |
|
"loss": 0.0334, |
|
"step": 9752 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 5.069067587567834e-05, |
|
"loss": 0.0375, |
|
"step": 9798 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 5.040700542673903e-05, |
|
"loss": 0.0396, |
|
"step": 9844 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 5.0123334977799704e-05, |
|
"loss": 0.0356, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 4.983966452886039e-05, |
|
"loss": 0.0458, |
|
"step": 9936 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 4.955599407992107e-05, |
|
"loss": 0.036, |
|
"step": 9982 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 4.9272323630981746e-05, |
|
"loss": 0.028, |
|
"step": 10028 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 4.898865318204243e-05, |
|
"loss": 0.0229, |
|
"step": 10074 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 4.870498273310311e-05, |
|
"loss": 0.0353, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 4.842131228416379e-05, |
|
"loss": 0.0278, |
|
"step": 10166 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 4.813764183522447e-05, |
|
"loss": 0.0296, |
|
"step": 10212 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 4.7853971386285154e-05, |
|
"loss": 0.0284, |
|
"step": 10258 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 4.757030093734584e-05, |
|
"loss": 0.0299, |
|
"step": 10304 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 4.728663048840651e-05, |
|
"loss": 0.0309, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 4.7002960039467196e-05, |
|
"loss": 0.0283, |
|
"step": 10396 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 4.671928959052788e-05, |
|
"loss": 0.0349, |
|
"step": 10442 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 4.6435619141588555e-05, |
|
"loss": 0.0343, |
|
"step": 10488 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 4.615194869264924e-05, |
|
"loss": 0.0347, |
|
"step": 10534 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 4.5868278243709914e-05, |
|
"loss": 0.035, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 4.55846077947706e-05, |
|
"loss": 0.0284, |
|
"step": 10626 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 4.530093734583128e-05, |
|
"loss": 0.0408, |
|
"step": 10672 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"learning_rate": 4.501726689689196e-05, |
|
"loss": 0.0386, |
|
"step": 10718 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 4.473359644795264e-05, |
|
"loss": 0.0288, |
|
"step": 10764 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 4.444992599901332e-05, |
|
"loss": 0.0274, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 4.4166255550074005e-05, |
|
"loss": 0.0268, |
|
"step": 10856 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"learning_rate": 4.388258510113468e-05, |
|
"loss": 0.0206, |
|
"step": 10902 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 4.3598914652195364e-05, |
|
"loss": 0.0281, |
|
"step": 10948 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 4.331524420325605e-05, |
|
"loss": 0.0242, |
|
"step": 10994 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 4.303157375431673e-05, |
|
"loss": 0.0246, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 4.2747903305377406e-05, |
|
"loss": 0.0275, |
|
"step": 11086 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 4.246423285643808e-05, |
|
"loss": 0.0259, |
|
"step": 11132 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 4.218056240749877e-05, |
|
"loss": 0.0251, |
|
"step": 11178 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"learning_rate": 4.189689195855945e-05, |
|
"loss": 0.0229, |
|
"step": 11224 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"learning_rate": 4.161322150962013e-05, |
|
"loss": 0.0257, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"learning_rate": 4.132955106068081e-05, |
|
"loss": 0.0269, |
|
"step": 11316 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"learning_rate": 4.10458806117415e-05, |
|
"loss": 0.0344, |
|
"step": 11362 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 4.0762210162802173e-05, |
|
"loss": 0.0288, |
|
"step": 11408 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"learning_rate": 4.047853971386285e-05, |
|
"loss": 0.0287, |
|
"step": 11454 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 4.019486926492353e-05, |
|
"loss": 0.0315, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"learning_rate": 3.9911198815984215e-05, |
|
"loss": 0.0317, |
|
"step": 11546 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 3.96275283670449e-05, |
|
"loss": 0.02, |
|
"step": 11592 |
|
}, |
|
{ |
|
"epoch": 15.44, |
|
"learning_rate": 3.9343857918105575e-05, |
|
"loss": 0.0215, |
|
"step": 11638 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 3.906018746916626e-05, |
|
"loss": 0.0276, |
|
"step": 11684 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 3.877651702022694e-05, |
|
"loss": 0.0207, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 3.8492846571287617e-05, |
|
"loss": 0.0217, |
|
"step": 11776 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"learning_rate": 3.82091761223483e-05, |
|
"loss": 0.0254, |
|
"step": 11822 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"learning_rate": 3.7925505673408976e-05, |
|
"loss": 0.0245, |
|
"step": 11868 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 3.7641835224469665e-05, |
|
"loss": 0.0275, |
|
"step": 11914 |
|
}, |
|
{ |
|
"epoch": 16.82, |
|
"learning_rate": 3.735816477553034e-05, |
|
"loss": 0.0261, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 3.7074494326591025e-05, |
|
"loss": 0.0186, |
|
"step": 12006 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 3.67908238776517e-05, |
|
"loss": 0.0186, |
|
"step": 12052 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 3.650715342871239e-05, |
|
"loss": 0.0221, |
|
"step": 12098 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 3.6223482979773067e-05, |
|
"loss": 0.0199, |
|
"step": 12144 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 3.593981253083374e-05, |
|
"loss": 0.0241, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.5656142081894426e-05, |
|
"loss": 0.0224, |
|
"step": 12236 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 3.537247163295511e-05, |
|
"loss": 0.0236, |
|
"step": 12282 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 3.508880118401579e-05, |
|
"loss": 0.0206, |
|
"step": 12328 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 3.480513073507647e-05, |
|
"loss": 0.0167, |
|
"step": 12374 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 3.452146028613715e-05, |
|
"loss": 0.0209, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 16.69, |
|
"learning_rate": 3.4237789837197834e-05, |
|
"loss": 0.0188, |
|
"step": 12466 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 3.395411938825851e-05, |
|
"loss": 0.0189, |
|
"step": 12512 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 3.367044893931919e-05, |
|
"loss": 0.0229, |
|
"step": 12558 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 3.338677849037987e-05, |
|
"loss": 0.0186, |
|
"step": 12604 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 3.310310804144056e-05, |
|
"loss": 0.0218, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 3.2819437592501235e-05, |
|
"loss": 0.0165, |
|
"step": 12696 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"learning_rate": 3.253576714356192e-05, |
|
"loss": 0.0175, |
|
"step": 12742 |
|
}, |
|
{ |
|
"epoch": 18.07, |
|
"learning_rate": 3.2252096694622594e-05, |
|
"loss": 0.0159, |
|
"step": 12788 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 3.196842624568328e-05, |
|
"loss": 0.0174, |
|
"step": 12834 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 3.168475579674396e-05, |
|
"loss": 0.0195, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 3.1401085347804636e-05, |
|
"loss": 0.0203, |
|
"step": 12926 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"learning_rate": 3.111741489886532e-05, |
|
"loss": 0.019, |
|
"step": 12972 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 3.0833744449926e-05, |
|
"loss": 0.0197, |
|
"step": 13018 |
|
}, |
|
{ |
|
"epoch": 19.25, |
|
"learning_rate": 3.0550074000986685e-05, |
|
"loss": 0.0221, |
|
"step": 13064 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 3.026640355204736e-05, |
|
"loss": 0.0191, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 2.998273310310804e-05, |
|
"loss": 0.0138, |
|
"step": 13156 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 2.9699062654168723e-05, |
|
"loss": 0.0149, |
|
"step": 13202 |
|
}, |
|
{ |
|
"epoch": 17.75, |
|
"learning_rate": 2.9415392205229403e-05, |
|
"loss": 0.018, |
|
"step": 13248 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"learning_rate": 2.9131721756290086e-05, |
|
"loss": 0.0162, |
|
"step": 13294 |
|
}, |
|
{ |
|
"epoch": 18.14, |
|
"learning_rate": 2.8848051307350765e-05, |
|
"loss": 0.0153, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 18.34, |
|
"learning_rate": 2.8564380858411448e-05, |
|
"loss": 0.0141, |
|
"step": 13386 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 2.8280710409472128e-05, |
|
"loss": 0.013, |
|
"step": 13432 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"learning_rate": 2.7997039960532807e-05, |
|
"loss": 0.0184, |
|
"step": 13478 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 2.771336951159349e-05, |
|
"loss": 0.0149, |
|
"step": 13524 |
|
}, |
|
{ |
|
"epoch": 19.12, |
|
"learning_rate": 2.742969906265417e-05, |
|
"loss": 0.0159, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 19.32, |
|
"learning_rate": 2.7146028613714853e-05, |
|
"loss": 0.0171, |
|
"step": 13616 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 2.6862358164775532e-05, |
|
"loss": 0.0174, |
|
"step": 13662 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 2.6578687715836215e-05, |
|
"loss": 0.0159, |
|
"step": 13708 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 2.629501726689689e-05, |
|
"loss": 0.0202, |
|
"step": 13754 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"learning_rate": 2.601134681795757e-05, |
|
"loss": 0.0182, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 2.5727676369018254e-05, |
|
"loss": 0.0138, |
|
"step": 13846 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 2.5444005920078933e-05, |
|
"loss": 0.0136, |
|
"step": 13892 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 2.5160335471139616e-05, |
|
"loss": 0.0135, |
|
"step": 13938 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 2.4876665022200296e-05, |
|
"loss": 0.0125, |
|
"step": 13984 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 2.459299457326098e-05, |
|
"loss": 0.014, |
|
"step": 14030 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2.430932412432166e-05, |
|
"loss": 0.0137, |
|
"step": 14076 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 2.402565367538234e-05, |
|
"loss": 0.0162, |
|
"step": 14122 |
|
}, |
|
{ |
|
"epoch": 19.39, |
|
"learning_rate": 2.374198322644302e-05, |
|
"loss": 0.0134, |
|
"step": 14168 |
|
}, |
|
{ |
|
"epoch": 19.59, |
|
"learning_rate": 2.34583127775037e-05, |
|
"loss": 0.0113, |
|
"step": 14214 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 2.3174642328564383e-05, |
|
"loss": 0.0134, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 19.98, |
|
"learning_rate": 2.2890971879625063e-05, |
|
"loss": 0.0135, |
|
"step": 14306 |
|
}, |
|
{ |
|
"epoch": 20.18, |
|
"learning_rate": 2.2607301430685742e-05, |
|
"loss": 0.0132, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 20.38, |
|
"learning_rate": 2.2323630981746425e-05, |
|
"loss": 0.015, |
|
"step": 14398 |
|
}, |
|
{ |
|
"epoch": 20.57, |
|
"learning_rate": 2.2039960532807105e-05, |
|
"loss": 0.0117, |
|
"step": 14444 |
|
}, |
|
{ |
|
"epoch": 20.77, |
|
"learning_rate": 2.1756290083867788e-05, |
|
"loss": 0.0148, |
|
"step": 14490 |
|
}, |
|
{ |
|
"epoch": 20.97, |
|
"learning_rate": 2.1472619634928467e-05, |
|
"loss": 0.0168, |
|
"step": 14536 |
|
}, |
|
{ |
|
"epoch": 21.16, |
|
"learning_rate": 2.118894918598915e-05, |
|
"loss": 0.0139, |
|
"step": 14582 |
|
}, |
|
{ |
|
"epoch": 19.07, |
|
"learning_rate": 2.0905278737049827e-05, |
|
"loss": 0.0112, |
|
"step": 14628 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"learning_rate": 2.062160828811051e-05, |
|
"loss": 0.0094, |
|
"step": 14674 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 2.033793783917119e-05, |
|
"loss": 0.0092, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 19.66, |
|
"learning_rate": 2.0054267390231872e-05, |
|
"loss": 0.0111, |
|
"step": 14766 |
|
}, |
|
{ |
|
"epoch": 19.86, |
|
"learning_rate": 1.977059694129255e-05, |
|
"loss": 0.0112, |
|
"step": 14812 |
|
}, |
|
{ |
|
"epoch": 20.06, |
|
"learning_rate": 1.9486926492353234e-05, |
|
"loss": 0.0127, |
|
"step": 14858 |
|
}, |
|
{ |
|
"epoch": 20.25, |
|
"learning_rate": 1.9203256043413914e-05, |
|
"loss": 0.0105, |
|
"step": 14904 |
|
}, |
|
{ |
|
"epoch": 20.45, |
|
"learning_rate": 1.8919585594474594e-05, |
|
"loss": 0.0142, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 1.8635915145535273e-05, |
|
"loss": 0.0106, |
|
"step": 14996 |
|
}, |
|
{ |
|
"epoch": 20.84, |
|
"learning_rate": 1.8352244696595956e-05, |
|
"loss": 0.0115, |
|
"step": 15042 |
|
}, |
|
{ |
|
"epoch": 21.04, |
|
"learning_rate": 1.8068574247656636e-05, |
|
"loss": 0.0114, |
|
"step": 15088 |
|
}, |
|
{ |
|
"epoch": 21.24, |
|
"learning_rate": 1.778490379871732e-05, |
|
"loss": 0.012, |
|
"step": 15134 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"learning_rate": 1.7501233349777998e-05, |
|
"loss": 0.0111, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 21.63, |
|
"learning_rate": 1.721756290083868e-05, |
|
"loss": 0.0127, |
|
"step": 15226 |
|
}, |
|
{ |
|
"epoch": 21.82, |
|
"learning_rate": 1.693389245189936e-05, |
|
"loss": 0.0108, |
|
"step": 15272 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"learning_rate": 1.665022200296004e-05, |
|
"loss": 0.0123, |
|
"step": 15318 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 1.636655155402072e-05, |
|
"loss": 0.0116, |
|
"step": 15364 |
|
}, |
|
{ |
|
"epoch": 20.13, |
|
"learning_rate": 1.6082881105081403e-05, |
|
"loss": 0.0115, |
|
"step": 15410 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"learning_rate": 1.5799210656142082e-05, |
|
"loss": 0.0083, |
|
"step": 15456 |
|
}, |
|
{ |
|
"epoch": 20.52, |
|
"learning_rate": 1.5515540207202765e-05, |
|
"loss": 0.0094, |
|
"step": 15502 |
|
}, |
|
{ |
|
"epoch": 20.72, |
|
"learning_rate": 1.5231869758263445e-05, |
|
"loss": 0.0087, |
|
"step": 15548 |
|
}, |
|
{ |
|
"epoch": 20.91, |
|
"learning_rate": 1.4948199309324126e-05, |
|
"loss": 0.0119, |
|
"step": 15594 |
|
}, |
|
{ |
|
"epoch": 21.11, |
|
"learning_rate": 1.4664528860384805e-05, |
|
"loss": 0.009, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 21.31, |
|
"learning_rate": 1.4380858411445487e-05, |
|
"loss": 0.0113, |
|
"step": 15686 |
|
}, |
|
{ |
|
"epoch": 21.5, |
|
"learning_rate": 1.4097187962506166e-05, |
|
"loss": 0.0096, |
|
"step": 15732 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"learning_rate": 1.3813517513566847e-05, |
|
"loss": 0.0103, |
|
"step": 15778 |
|
}, |
|
{ |
|
"epoch": 21.9, |
|
"learning_rate": 1.3529847064627529e-05, |
|
"loss": 0.0089, |
|
"step": 15824 |
|
}, |
|
{ |
|
"epoch": 22.09, |
|
"learning_rate": 1.324617661568821e-05, |
|
"loss": 0.0082, |
|
"step": 15870 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"learning_rate": 1.2962506166748891e-05, |
|
"loss": 0.01, |
|
"step": 15916 |
|
}, |
|
{ |
|
"epoch": 22.49, |
|
"learning_rate": 1.267883571780957e-05, |
|
"loss": 0.0084, |
|
"step": 15962 |
|
}, |
|
{ |
|
"epoch": 22.68, |
|
"learning_rate": 1.2395165268870252e-05, |
|
"loss": 0.0093, |
|
"step": 16008 |
|
}, |
|
{ |
|
"epoch": 22.88, |
|
"learning_rate": 1.2111494819930933e-05, |
|
"loss": 0.0104, |
|
"step": 16054 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 1.1827824370991614e-05, |
|
"loss": 0.0117, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"learning_rate": 1.1544153922052296e-05, |
|
"loss": 0.0095, |
|
"step": 16146 |
|
}, |
|
{ |
|
"epoch": 21.18, |
|
"learning_rate": 1.1260483473112975e-05, |
|
"loss": 0.0085, |
|
"step": 16192 |
|
}, |
|
{ |
|
"epoch": 21.38, |
|
"learning_rate": 1.0976813024173656e-05, |
|
"loss": 0.0083, |
|
"step": 16238 |
|
}, |
|
{ |
|
"epoch": 21.58, |
|
"learning_rate": 1.0693142575234338e-05, |
|
"loss": 0.0081, |
|
"step": 16284 |
|
}, |
|
{ |
|
"epoch": 21.77, |
|
"learning_rate": 1.0409472126295019e-05, |
|
"loss": 0.0068, |
|
"step": 16330 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"learning_rate": 1.0125801677355699e-05, |
|
"loss": 0.0098, |
|
"step": 16376 |
|
}, |
|
{ |
|
"epoch": 22.17, |
|
"learning_rate": 9.84213122841638e-06, |
|
"loss": 0.0087, |
|
"step": 16422 |
|
}, |
|
{ |
|
"epoch": 22.36, |
|
"learning_rate": 9.558460779477061e-06, |
|
"loss": 0.007, |
|
"step": 16468 |
|
}, |
|
{ |
|
"epoch": 22.56, |
|
"learning_rate": 9.274790330537742e-06, |
|
"loss": 0.0085, |
|
"step": 16514 |
|
}, |
|
{ |
|
"epoch": 22.76, |
|
"learning_rate": 8.991119881598422e-06, |
|
"loss": 0.0094, |
|
"step": 16560 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"learning_rate": 8.707449432659103e-06, |
|
"loss": 0.0077, |
|
"step": 16606 |
|
}, |
|
{ |
|
"epoch": 23.15, |
|
"learning_rate": 8.423778983719784e-06, |
|
"loss": 0.0077, |
|
"step": 16652 |
|
}, |
|
{ |
|
"epoch": 23.35, |
|
"learning_rate": 8.140108534780466e-06, |
|
"loss": 0.008, |
|
"step": 16698 |
|
}, |
|
{ |
|
"epoch": 23.54, |
|
"learning_rate": 7.856438085841145e-06, |
|
"loss": 0.0088, |
|
"step": 16744 |
|
}, |
|
{ |
|
"epoch": 23.74, |
|
"learning_rate": 7.5727676369018255e-06, |
|
"loss": 0.0078, |
|
"step": 16790 |
|
}, |
|
{ |
|
"epoch": 23.94, |
|
"learning_rate": 7.289097187962507e-06, |
|
"loss": 0.0095, |
|
"step": 16836 |
|
}, |
|
{ |
|
"epoch": 24.13, |
|
"learning_rate": 7.005426739023187e-06, |
|
"loss": 0.0079, |
|
"step": 16882 |
|
}, |
|
{ |
|
"epoch": 22.04, |
|
"learning_rate": 6.721756290083868e-06, |
|
"loss": 0.0068, |
|
"step": 16928 |
|
}, |
|
{ |
|
"epoch": 22.24, |
|
"learning_rate": 6.438085841144549e-06, |
|
"loss": 0.0074, |
|
"step": 16974 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 6.15441539220523e-06, |
|
"loss": 0.0095, |
|
"step": 17020 |
|
}, |
|
{ |
|
"epoch": 22.63, |
|
"learning_rate": 5.87074494326591e-06, |
|
"loss": 0.007, |
|
"step": 17066 |
|
}, |
|
{ |
|
"epoch": 22.83, |
|
"learning_rate": 5.587074494326592e-06, |
|
"loss": 0.0059, |
|
"step": 17112 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"learning_rate": 5.303404045387272e-06, |
|
"loss": 0.0075, |
|
"step": 17158 |
|
}, |
|
{ |
|
"epoch": 23.22, |
|
"learning_rate": 5.019733596447953e-06, |
|
"loss": 0.0069, |
|
"step": 17204 |
|
}, |
|
{ |
|
"epoch": 23.42, |
|
"learning_rate": 4.736063147508634e-06, |
|
"loss": 0.0068, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 23.62, |
|
"learning_rate": 4.452392698569315e-06, |
|
"loss": 0.0079, |
|
"step": 17296 |
|
}, |
|
{ |
|
"epoch": 23.81, |
|
"learning_rate": 4.168722249629995e-06, |
|
"loss": 0.0068, |
|
"step": 17342 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 3.8850518006906765e-06, |
|
"loss": 0.0074, |
|
"step": 17388 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 3.6013813517513565e-06, |
|
"loss": 0.0068, |
|
"step": 17434 |
|
}, |
|
{ |
|
"epoch": 24.4, |
|
"learning_rate": 3.3177109028120377e-06, |
|
"loss": 0.0076, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 24.6, |
|
"learning_rate": 3.0340404538727186e-06, |
|
"loss": 0.0063, |
|
"step": 17526 |
|
}, |
|
{ |
|
"epoch": 24.79, |
|
"learning_rate": 2.7503700049333994e-06, |
|
"loss": 0.0066, |
|
"step": 17572 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"learning_rate": 2.46669955599408e-06, |
|
"loss": 0.0077, |
|
"step": 17618 |
|
}, |
|
{ |
|
"epoch": 25.19, |
|
"learning_rate": 2.183029107054761e-06, |
|
"loss": 0.0077, |
|
"step": 17664 |
|
}, |
|
{ |
|
"epoch": 23.1, |
|
"learning_rate": 1.8993586581154416e-06, |
|
"loss": 0.0065, |
|
"step": 17710 |
|
}, |
|
{ |
|
"epoch": 23.29, |
|
"learning_rate": 1.6156882091761224e-06, |
|
"loss": 0.007, |
|
"step": 17756 |
|
}, |
|
{ |
|
"epoch": 23.49, |
|
"learning_rate": 1.3320177602368033e-06, |
|
"loss": 0.0064, |
|
"step": 17802 |
|
}, |
|
{ |
|
"epoch": 23.69, |
|
"learning_rate": 1.048347311297484e-06, |
|
"loss": 0.0066, |
|
"step": 17848 |
|
}, |
|
{ |
|
"epoch": 23.88, |
|
"learning_rate": 7.646768623581648e-07, |
|
"loss": 0.0057, |
|
"step": 17894 |
|
}, |
|
{ |
|
"epoch": 24.08, |
|
"learning_rate": 4.810064134188456e-07, |
|
"loss": 0.0063, |
|
"step": 17940 |
|
}, |
|
{ |
|
"epoch": 24.28, |
|
"learning_rate": 1.973359644795264e-07, |
|
"loss": 0.0057, |
|
"step": 17986 |
|
} |
|
], |
|
"logging_steps": 46, |
|
"max_steps": 18018, |
|
"num_train_epochs": 77, |
|
"save_steps": 500, |
|
"total_flos": 7.874587036609413e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|