|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 200, |
|
"global_step": 1124, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0017793594306049821, |
|
"grad_norm": 1.8633774216343981, |
|
"learning_rate": 9.99998046979289e-06, |
|
"loss": 0.0532, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0035587188612099642, |
|
"grad_norm": 1.6781114480056516, |
|
"learning_rate": 9.999921879324127e-06, |
|
"loss": 0.0593, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.005338078291814947, |
|
"grad_norm": 2.1381878710549427, |
|
"learning_rate": 9.999824229051425e-06, |
|
"loss": 0.0748, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0071174377224199285, |
|
"grad_norm": 3.4540885707757574, |
|
"learning_rate": 9.999687519737639e-06, |
|
"loss": 0.0783, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.008896797153024912, |
|
"grad_norm": 2.6168368469978414, |
|
"learning_rate": 9.99951175245075e-06, |
|
"loss": 0.0864, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.010676156583629894, |
|
"grad_norm": 2.464541157320704, |
|
"learning_rate": 9.999296928563868e-06, |
|
"loss": 0.0783, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.012455516014234875, |
|
"grad_norm": 3.242715069848123, |
|
"learning_rate": 9.999043049755216e-06, |
|
"loss": 0.1087, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.014234875444839857, |
|
"grad_norm": 1.720576187595845, |
|
"learning_rate": 9.998750118008117e-06, |
|
"loss": 0.0489, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01601423487544484, |
|
"grad_norm": 2.1886717504306152, |
|
"learning_rate": 9.998418135610974e-06, |
|
"loss": 0.0785, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.017793594306049824, |
|
"grad_norm": 2.248124937227808, |
|
"learning_rate": 9.998047105157265e-06, |
|
"loss": 0.0834, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.019572953736654804, |
|
"grad_norm": 1.9632033387778847, |
|
"learning_rate": 9.997637029545509e-06, |
|
"loss": 0.0535, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.021352313167259787, |
|
"grad_norm": 2.5175798942323135, |
|
"learning_rate": 9.997187911979252e-06, |
|
"loss": 0.0858, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.023131672597864767, |
|
"grad_norm": 4.817015009388178, |
|
"learning_rate": 9.996699755967035e-06, |
|
"loss": 0.1203, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.02491103202846975, |
|
"grad_norm": 2.98510519832645, |
|
"learning_rate": 9.996172565322375e-06, |
|
"loss": 0.107, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.026690391459074734, |
|
"grad_norm": 2.3533153659129624, |
|
"learning_rate": 9.995606344163728e-06, |
|
"loss": 0.093, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.028469750889679714, |
|
"grad_norm": 1.926714880171991, |
|
"learning_rate": 9.995001096914462e-06, |
|
"loss": 0.0888, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.030249110320284697, |
|
"grad_norm": 2.2338054352391787, |
|
"learning_rate": 9.994356828302818e-06, |
|
"loss": 0.1246, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03202846975088968, |
|
"grad_norm": 1.4526929567184619, |
|
"learning_rate": 9.993673543361874e-06, |
|
"loss": 0.0789, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.033807829181494664, |
|
"grad_norm": 2.2274260607163923, |
|
"learning_rate": 9.992951247429512e-06, |
|
"loss": 0.1174, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03558718861209965, |
|
"grad_norm": 1.5830224818924863, |
|
"learning_rate": 9.992189946148366e-06, |
|
"loss": 0.0742, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.037366548042704624, |
|
"grad_norm": 1.1725842280498482, |
|
"learning_rate": 9.991389645465786e-06, |
|
"loss": 0.0621, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03914590747330961, |
|
"grad_norm": 1.8731783584771908, |
|
"learning_rate": 9.990550351633784e-06, |
|
"loss": 0.0944, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04092526690391459, |
|
"grad_norm": 1.5397480973720057, |
|
"learning_rate": 9.989672071208993e-06, |
|
"loss": 0.0833, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.042704626334519574, |
|
"grad_norm": 1.8635178832963464, |
|
"learning_rate": 9.988754811052616e-06, |
|
"loss": 0.0944, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04448398576512456, |
|
"grad_norm": 1.8387523570753084, |
|
"learning_rate": 9.987798578330365e-06, |
|
"loss": 0.0888, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.046263345195729534, |
|
"grad_norm": 1.7378513245047862, |
|
"learning_rate": 9.986803380512406e-06, |
|
"loss": 0.0907, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04804270462633452, |
|
"grad_norm": 2.4634757693542615, |
|
"learning_rate": 9.98576922537331e-06, |
|
"loss": 0.1164, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0498220640569395, |
|
"grad_norm": 2.095630411943287, |
|
"learning_rate": 9.984696120991979e-06, |
|
"loss": 0.0905, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.051601423487544484, |
|
"grad_norm": 1.66605315792323, |
|
"learning_rate": 9.983584075751598e-06, |
|
"loss": 0.0734, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05338078291814947, |
|
"grad_norm": 1.5880597676865722, |
|
"learning_rate": 9.982433098339553e-06, |
|
"loss": 0.0686, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05516014234875445, |
|
"grad_norm": 2.9616227607812404, |
|
"learning_rate": 9.981243197747375e-06, |
|
"loss": 0.1318, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.05693950177935943, |
|
"grad_norm": 2.618036580496317, |
|
"learning_rate": 9.980014383270668e-06, |
|
"loss": 0.115, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.05871886120996441, |
|
"grad_norm": 1.7254129893319312, |
|
"learning_rate": 9.978746664509032e-06, |
|
"loss": 0.0848, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.060498220640569395, |
|
"grad_norm": 1.780599598524534, |
|
"learning_rate": 9.97744005136599e-06, |
|
"loss": 0.0832, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.06227758007117438, |
|
"grad_norm": 1.7631456022296241, |
|
"learning_rate": 9.976094554048912e-06, |
|
"loss": 0.0826, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06405693950177936, |
|
"grad_norm": 1.8501009544110492, |
|
"learning_rate": 9.974710183068935e-06, |
|
"loss": 0.0849, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06583629893238434, |
|
"grad_norm": 1.661923790973167, |
|
"learning_rate": 9.97328694924088e-06, |
|
"loss": 0.0916, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.06761565836298933, |
|
"grad_norm": 1.8729161133745074, |
|
"learning_rate": 9.971824863683168e-06, |
|
"loss": 0.0934, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0693950177935943, |
|
"grad_norm": 1.8841256074253598, |
|
"learning_rate": 9.970323937817732e-06, |
|
"loss": 0.0899, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0711743772241993, |
|
"grad_norm": 1.7635206710576319, |
|
"learning_rate": 9.968784183369929e-06, |
|
"loss": 0.0818, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07295373665480427, |
|
"grad_norm": 1.7985257450620409, |
|
"learning_rate": 9.96720561236845e-06, |
|
"loss": 0.0878, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.07473309608540925, |
|
"grad_norm": 2.3785200560216966, |
|
"learning_rate": 9.965588237145219e-06, |
|
"loss": 0.1488, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07651245551601424, |
|
"grad_norm": 1.6882877139190267, |
|
"learning_rate": 9.963932070335307e-06, |
|
"loss": 0.1094, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.07829181494661921, |
|
"grad_norm": 1.896655169309608, |
|
"learning_rate": 9.962237124876828e-06, |
|
"loss": 0.1131, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0800711743772242, |
|
"grad_norm": 1.7345209950465361, |
|
"learning_rate": 9.960503414010833e-06, |
|
"loss": 0.0995, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08185053380782918, |
|
"grad_norm": 1.6164305039760034, |
|
"learning_rate": 9.958730951281218e-06, |
|
"loss": 0.0864, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.08362989323843416, |
|
"grad_norm": 1.929674899770172, |
|
"learning_rate": 9.956919750534607e-06, |
|
"loss": 0.0975, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.08540925266903915, |
|
"grad_norm": 2.1490784905047295, |
|
"learning_rate": 9.955069825920249e-06, |
|
"loss": 0.1161, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08718861209964412, |
|
"grad_norm": 1.8969744452241513, |
|
"learning_rate": 9.953181191889913e-06, |
|
"loss": 0.113, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.08896797153024912, |
|
"grad_norm": 1.5005925544103276, |
|
"learning_rate": 9.95125386319776e-06, |
|
"loss": 0.084, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09074733096085409, |
|
"grad_norm": 1.9164817809179493, |
|
"learning_rate": 9.949287854900243e-06, |
|
"loss": 0.1075, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.09252669039145907, |
|
"grad_norm": 2.0328967625440453, |
|
"learning_rate": 9.947283182355982e-06, |
|
"loss": 0.0969, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.09430604982206406, |
|
"grad_norm": 1.5820220658346564, |
|
"learning_rate": 9.945239861225644e-06, |
|
"loss": 0.089, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.09608540925266904, |
|
"grad_norm": 2.0204847447459557, |
|
"learning_rate": 9.943157907471825e-06, |
|
"loss": 0.0951, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.09786476868327403, |
|
"grad_norm": 1.541036281246953, |
|
"learning_rate": 9.941037337358918e-06, |
|
"loss": 0.077, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.099644128113879, |
|
"grad_norm": 2.0522136047386215, |
|
"learning_rate": 9.938878167452991e-06, |
|
"loss": 0.1095, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.10142348754448399, |
|
"grad_norm": 2.6195655053336657, |
|
"learning_rate": 9.936680414621663e-06, |
|
"loss": 0.1137, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.10320284697508897, |
|
"grad_norm": 1.6644231463393615, |
|
"learning_rate": 9.934444096033958e-06, |
|
"loss": 0.0867, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.10498220640569395, |
|
"grad_norm": 1.870807436689327, |
|
"learning_rate": 9.932169229160183e-06, |
|
"loss": 0.0953, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.10676156583629894, |
|
"grad_norm": 1.7981555703185066, |
|
"learning_rate": 9.929855831771787e-06, |
|
"loss": 0.0878, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10854092526690391, |
|
"grad_norm": 1.9275475373347888, |
|
"learning_rate": 9.927503921941218e-06, |
|
"loss": 0.1125, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1103202846975089, |
|
"grad_norm": 1.777555635548715, |
|
"learning_rate": 9.925113518041796e-06, |
|
"loss": 0.1088, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.11209964412811388, |
|
"grad_norm": 1.7944496493752577, |
|
"learning_rate": 9.922684638747551e-06, |
|
"loss": 0.1018, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.11387900355871886, |
|
"grad_norm": 1.8488928409842156, |
|
"learning_rate": 9.920217303033091e-06, |
|
"loss": 0.1144, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.11565836298932385, |
|
"grad_norm": 1.726681171924083, |
|
"learning_rate": 9.917711530173444e-06, |
|
"loss": 0.1091, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.11743772241992882, |
|
"grad_norm": 2.1196027219561806, |
|
"learning_rate": 9.91516733974392e-06, |
|
"loss": 0.1041, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.11921708185053381, |
|
"grad_norm": 1.518644945800736, |
|
"learning_rate": 9.912584751619943e-06, |
|
"loss": 0.0869, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.12099644128113879, |
|
"grad_norm": 1.4679256158377554, |
|
"learning_rate": 9.909963785976902e-06, |
|
"loss": 0.0941, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.12277580071174377, |
|
"grad_norm": 1.5382536320532547, |
|
"learning_rate": 9.907304463290004e-06, |
|
"loss": 0.088, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.12455516014234876, |
|
"grad_norm": 1.4742652540743844, |
|
"learning_rate": 9.904606804334094e-06, |
|
"loss": 0.0889, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12633451957295375, |
|
"grad_norm": 1.591203280255007, |
|
"learning_rate": 9.901870830183506e-06, |
|
"loss": 0.1052, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.12811387900355872, |
|
"grad_norm": 1.434951772829839, |
|
"learning_rate": 9.899096562211902e-06, |
|
"loss": 0.084, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1298932384341637, |
|
"grad_norm": 1.817013462385801, |
|
"learning_rate": 9.896284022092088e-06, |
|
"loss": 0.1085, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.13167259786476868, |
|
"grad_norm": 2.089810370712842, |
|
"learning_rate": 9.893433231795864e-06, |
|
"loss": 0.1358, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.13345195729537365, |
|
"grad_norm": 2.1016895306135295, |
|
"learning_rate": 9.890544213593838e-06, |
|
"loss": 0.129, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.13523131672597866, |
|
"grad_norm": 1.7004389338060877, |
|
"learning_rate": 9.887616990055262e-06, |
|
"loss": 0.1241, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.13701067615658363, |
|
"grad_norm": 2.0444063822725425, |
|
"learning_rate": 9.884651584047845e-06, |
|
"loss": 0.1197, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1387900355871886, |
|
"grad_norm": 1.9727286190481665, |
|
"learning_rate": 9.881648018737587e-06, |
|
"loss": 0.1244, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.14056939501779359, |
|
"grad_norm": 1.6882157540173706, |
|
"learning_rate": 9.878606317588588e-06, |
|
"loss": 0.0931, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1423487544483986, |
|
"grad_norm": 2.3908129529327065, |
|
"learning_rate": 9.875526504362868e-06, |
|
"loss": 0.1302, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14412811387900357, |
|
"grad_norm": 2.1567643980341864, |
|
"learning_rate": 9.872408603120187e-06, |
|
"loss": 0.1191, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.14590747330960854, |
|
"grad_norm": 1.4491861727066735, |
|
"learning_rate": 9.869252638217846e-06, |
|
"loss": 0.0845, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.14768683274021352, |
|
"grad_norm": 1.356527237792737, |
|
"learning_rate": 9.866058634310503e-06, |
|
"loss": 0.0831, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1494661921708185, |
|
"grad_norm": 1.627408408342518, |
|
"learning_rate": 9.862826616349981e-06, |
|
"loss": 0.1016, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1512455516014235, |
|
"grad_norm": 2.256869482840095, |
|
"learning_rate": 9.859556609585075e-06, |
|
"loss": 0.1047, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.15302491103202848, |
|
"grad_norm": 1.4595352749772679, |
|
"learning_rate": 9.856248639561346e-06, |
|
"loss": 0.0895, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.15480427046263345, |
|
"grad_norm": 1.6846755510472051, |
|
"learning_rate": 9.85290273212093e-06, |
|
"loss": 0.0996, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.15658362989323843, |
|
"grad_norm": 1.9935849884447174, |
|
"learning_rate": 9.849518913402334e-06, |
|
"loss": 0.1252, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.1583629893238434, |
|
"grad_norm": 1.637500492620635, |
|
"learning_rate": 9.84609720984023e-06, |
|
"loss": 0.0967, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.1601423487544484, |
|
"grad_norm": 1.822962127869925, |
|
"learning_rate": 9.84263764816525e-06, |
|
"loss": 0.0999, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1619217081850534, |
|
"grad_norm": 1.4656469828897758, |
|
"learning_rate": 9.839140255403776e-06, |
|
"loss": 0.0953, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.16370106761565836, |
|
"grad_norm": 1.4787564811598093, |
|
"learning_rate": 9.83560505887773e-06, |
|
"loss": 0.0923, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.16548042704626334, |
|
"grad_norm": 1.5408473490694214, |
|
"learning_rate": 9.83203208620436e-06, |
|
"loss": 0.105, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.16725978647686832, |
|
"grad_norm": 2.0697232314409963, |
|
"learning_rate": 9.828421365296023e-06, |
|
"loss": 0.1147, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.16903914590747332, |
|
"grad_norm": 2.0748371027980808, |
|
"learning_rate": 9.824772924359974e-06, |
|
"loss": 0.1326, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1708185053380783, |
|
"grad_norm": 1.5463154398484957, |
|
"learning_rate": 9.821086791898133e-06, |
|
"loss": 0.1019, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.17259786476868327, |
|
"grad_norm": 1.7512693043434266, |
|
"learning_rate": 9.817362996706872e-06, |
|
"loss": 0.1289, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.17437722419928825, |
|
"grad_norm": 1.7497743137770054, |
|
"learning_rate": 9.81360156787679e-06, |
|
"loss": 0.1112, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.17615658362989323, |
|
"grad_norm": 1.3740899937015096, |
|
"learning_rate": 9.809802534792477e-06, |
|
"loss": 0.0913, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.17793594306049823, |
|
"grad_norm": 1.5824790170275451, |
|
"learning_rate": 9.805965927132294e-06, |
|
"loss": 0.0991, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1797153024911032, |
|
"grad_norm": 1.6268679057916984, |
|
"learning_rate": 9.802091774868143e-06, |
|
"loss": 0.0985, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.18149466192170818, |
|
"grad_norm": 1.4070923831156836, |
|
"learning_rate": 9.798180108265218e-06, |
|
"loss": 0.0998, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.18327402135231316, |
|
"grad_norm": 1.385496055395342, |
|
"learning_rate": 9.794230957881785e-06, |
|
"loss": 0.0853, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.18505338078291814, |
|
"grad_norm": 1.7274857555484675, |
|
"learning_rate": 9.79024435456893e-06, |
|
"loss": 0.114, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.18683274021352314, |
|
"grad_norm": 1.704795289585538, |
|
"learning_rate": 9.786220329470334e-06, |
|
"loss": 0.0976, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.18861209964412812, |
|
"grad_norm": 1.711772429112465, |
|
"learning_rate": 9.782158914022011e-06, |
|
"loss": 0.1015, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.1903914590747331, |
|
"grad_norm": 1.3244359520860687, |
|
"learning_rate": 9.778060139952075e-06, |
|
"loss": 0.0864, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.19217081850533807, |
|
"grad_norm": 1.86568176652722, |
|
"learning_rate": 9.773924039280488e-06, |
|
"loss": 0.1083, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.19395017793594305, |
|
"grad_norm": 1.4897448323263112, |
|
"learning_rate": 9.769750644318814e-06, |
|
"loss": 0.1025, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.19572953736654805, |
|
"grad_norm": 1.902563493208754, |
|
"learning_rate": 9.765539987669956e-06, |
|
"loss": 0.1149, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.19750889679715303, |
|
"grad_norm": 1.8787658210147185, |
|
"learning_rate": 9.761292102227917e-06, |
|
"loss": 0.1303, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.199288256227758, |
|
"grad_norm": 2.2267424829065945, |
|
"learning_rate": 9.757007021177529e-06, |
|
"loss": 0.1143, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.20106761565836298, |
|
"grad_norm": 1.1993867265308422, |
|
"learning_rate": 9.752684777994197e-06, |
|
"loss": 0.0771, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.20284697508896798, |
|
"grad_norm": 1.820398681246872, |
|
"learning_rate": 9.748325406443647e-06, |
|
"loss": 0.1102, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.20462633451957296, |
|
"grad_norm": 1.8241314343833193, |
|
"learning_rate": 9.743928940581646e-06, |
|
"loss": 0.121, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.20640569395017794, |
|
"grad_norm": 1.6715128177445249, |
|
"learning_rate": 9.739495414753754e-06, |
|
"loss": 0.113, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.20818505338078291, |
|
"grad_norm": 1.7720542621428181, |
|
"learning_rate": 9.73502486359504e-06, |
|
"loss": 0.1078, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2099644128113879, |
|
"grad_norm": 1.6761469698086036, |
|
"learning_rate": 9.73051732202982e-06, |
|
"loss": 0.1045, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.2117437722419929, |
|
"grad_norm": 1.3475020270008593, |
|
"learning_rate": 9.725972825271381e-06, |
|
"loss": 0.0877, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.21352313167259787, |
|
"grad_norm": 2.0013761133499783, |
|
"learning_rate": 9.721391408821713e-06, |
|
"loss": 0.0981, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21530249110320285, |
|
"grad_norm": 1.8184013019078094, |
|
"learning_rate": 9.716773108471213e-06, |
|
"loss": 0.1149, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.21708185053380782, |
|
"grad_norm": 1.7842262137509288, |
|
"learning_rate": 9.712117960298433e-06, |
|
"loss": 0.1092, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2188612099644128, |
|
"grad_norm": 1.7721172642097713, |
|
"learning_rate": 9.707426000669773e-06, |
|
"loss": 0.115, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2206405693950178, |
|
"grad_norm": 1.8043650157562519, |
|
"learning_rate": 9.702697266239211e-06, |
|
"loss": 0.1248, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.22241992882562278, |
|
"grad_norm": 2.2164024018161297, |
|
"learning_rate": 9.697931793948012e-06, |
|
"loss": 0.1299, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.22419928825622776, |
|
"grad_norm": 2.0082820366753213, |
|
"learning_rate": 9.693129621024441e-06, |
|
"loss": 0.1042, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.22597864768683273, |
|
"grad_norm": 1.7058934473089258, |
|
"learning_rate": 9.68829078498347e-06, |
|
"loss": 0.1055, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2277580071174377, |
|
"grad_norm": 1.9342594089888068, |
|
"learning_rate": 9.683415323626487e-06, |
|
"loss": 0.1238, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.22953736654804271, |
|
"grad_norm": 1.900626443925693, |
|
"learning_rate": 9.678503275040997e-06, |
|
"loss": 0.1215, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.2313167259786477, |
|
"grad_norm": 1.3100484507059196, |
|
"learning_rate": 9.673554677600336e-06, |
|
"loss": 0.0885, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23309608540925267, |
|
"grad_norm": 1.9050911082842035, |
|
"learning_rate": 9.668569569963355e-06, |
|
"loss": 0.1092, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.23487544483985764, |
|
"grad_norm": 1.1382950203289317, |
|
"learning_rate": 9.663547991074129e-06, |
|
"loss": 0.0719, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.23665480427046262, |
|
"grad_norm": 1.9344154137109186, |
|
"learning_rate": 9.658489980161643e-06, |
|
"loss": 0.1221, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.23843416370106763, |
|
"grad_norm": 1.8441713470355867, |
|
"learning_rate": 9.653395576739504e-06, |
|
"loss": 0.1102, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.2402135231316726, |
|
"grad_norm": 1.6871408215785684, |
|
"learning_rate": 9.648264820605611e-06, |
|
"loss": 0.1094, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.24199288256227758, |
|
"grad_norm": 1.613084331253525, |
|
"learning_rate": 9.643097751841854e-06, |
|
"loss": 0.11, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.24377224199288255, |
|
"grad_norm": 1.7835765415562164, |
|
"learning_rate": 9.637894410813803e-06, |
|
"loss": 0.1103, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.24555160142348753, |
|
"grad_norm": 1.7844014462300173, |
|
"learning_rate": 9.632654838170393e-06, |
|
"loss": 0.0922, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.24733096085409254, |
|
"grad_norm": 1.6343211267024071, |
|
"learning_rate": 9.627379074843595e-06, |
|
"loss": 0.1152, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.2491103202846975, |
|
"grad_norm": 1.6286191968032797, |
|
"learning_rate": 9.622067162048111e-06, |
|
"loss": 0.1044, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2508896797153025, |
|
"grad_norm": 1.428609201484492, |
|
"learning_rate": 9.616719141281044e-06, |
|
"loss": 0.0956, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2526690391459075, |
|
"grad_norm": 1.8122773920580002, |
|
"learning_rate": 9.611335054321576e-06, |
|
"loss": 0.1043, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.25444839857651247, |
|
"grad_norm": 1.8706140485864489, |
|
"learning_rate": 9.605914943230637e-06, |
|
"loss": 0.111, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.25622775800711745, |
|
"grad_norm": 1.442743537890464, |
|
"learning_rate": 9.600458850350588e-06, |
|
"loss": 0.1085, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2580071174377224, |
|
"grad_norm": 1.7822521828372837, |
|
"learning_rate": 9.594966818304875e-06, |
|
"loss": 0.1203, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2597864768683274, |
|
"grad_norm": 1.6010844676959501, |
|
"learning_rate": 9.589438889997712e-06, |
|
"loss": 0.0921, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2615658362989324, |
|
"grad_norm": 1.2139374142577783, |
|
"learning_rate": 9.583875108613727e-06, |
|
"loss": 0.0837, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.26334519572953735, |
|
"grad_norm": 1.6865361949468134, |
|
"learning_rate": 9.578275517617646e-06, |
|
"loss": 0.1095, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.26512455516014233, |
|
"grad_norm": 1.518083124629739, |
|
"learning_rate": 9.572640160753936e-06, |
|
"loss": 0.1062, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.2669039145907473, |
|
"grad_norm": 1.344740408689734, |
|
"learning_rate": 9.566969082046471e-06, |
|
"loss": 0.0879, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.26868327402135234, |
|
"grad_norm": 1.3004528648428582, |
|
"learning_rate": 9.561262325798188e-06, |
|
"loss": 0.0823, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2704626334519573, |
|
"grad_norm": 1.5342963063283659, |
|
"learning_rate": 9.555519936590739e-06, |
|
"loss": 0.0899, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2722419928825623, |
|
"grad_norm": 1.3971325542384678, |
|
"learning_rate": 9.549741959284147e-06, |
|
"loss": 0.0833, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.27402135231316727, |
|
"grad_norm": 1.498005550419629, |
|
"learning_rate": 9.543928439016445e-06, |
|
"loss": 0.09, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.27580071174377224, |
|
"grad_norm": 1.4361742798213601, |
|
"learning_rate": 9.538079421203339e-06, |
|
"loss": 0.0856, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2775800711743772, |
|
"grad_norm": 3.001280515192375, |
|
"learning_rate": 9.532194951537838e-06, |
|
"loss": 0.1072, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.2793594306049822, |
|
"grad_norm": 1.8141778956636307, |
|
"learning_rate": 9.52627507598991e-06, |
|
"loss": 0.123, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.28113879003558717, |
|
"grad_norm": 1.1729368637545088, |
|
"learning_rate": 9.52031984080611e-06, |
|
"loss": 0.0784, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.28291814946619215, |
|
"grad_norm": 2.305498707365601, |
|
"learning_rate": 9.514329292509227e-06, |
|
"loss": 0.0913, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.2846975088967972, |
|
"grad_norm": 1.9655877985925596, |
|
"learning_rate": 9.508303477897925e-06, |
|
"loss": 0.1169, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.28647686832740216, |
|
"grad_norm": 1.6957961118465965, |
|
"learning_rate": 9.502242444046365e-06, |
|
"loss": 0.0916, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.28825622775800713, |
|
"grad_norm": 1.6445616263637222, |
|
"learning_rate": 9.496146238303846e-06, |
|
"loss": 0.097, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.2900355871886121, |
|
"grad_norm": 1.683223300543312, |
|
"learning_rate": 9.49001490829443e-06, |
|
"loss": 0.1068, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.2918149466192171, |
|
"grad_norm": 2.635567260135097, |
|
"learning_rate": 9.483848501916578e-06, |
|
"loss": 0.1604, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.29359430604982206, |
|
"grad_norm": 1.8459408238859796, |
|
"learning_rate": 9.477647067342766e-06, |
|
"loss": 0.1238, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.29537366548042704, |
|
"grad_norm": 1.7761938833321853, |
|
"learning_rate": 9.471410653019115e-06, |
|
"loss": 0.1136, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.297153024911032, |
|
"grad_norm": 1.7966934848593425, |
|
"learning_rate": 9.46513930766501e-06, |
|
"loss": 0.1113, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.298932384341637, |
|
"grad_norm": 1.5736190500280933, |
|
"learning_rate": 9.458833080272723e-06, |
|
"loss": 0.108, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.30071174377224197, |
|
"grad_norm": 1.8276533290781536, |
|
"learning_rate": 9.45249202010702e-06, |
|
"loss": 0.1169, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.302491103202847, |
|
"grad_norm": 1.7403472556239872, |
|
"learning_rate": 9.446116176704791e-06, |
|
"loss": 0.1209, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.304270462633452, |
|
"grad_norm": 1.4423139066464603, |
|
"learning_rate": 9.439705599874653e-06, |
|
"loss": 0.0992, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.30604982206405695, |
|
"grad_norm": 1.9684145545094471, |
|
"learning_rate": 9.433260339696564e-06, |
|
"loss": 0.1448, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.30782918149466193, |
|
"grad_norm": 1.8032595011131323, |
|
"learning_rate": 9.426780446521429e-06, |
|
"loss": 0.1113, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3096085409252669, |
|
"grad_norm": 2.090747474330226, |
|
"learning_rate": 9.42026597097071e-06, |
|
"loss": 0.1343, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3113879003558719, |
|
"grad_norm": 1.3376837373805128, |
|
"learning_rate": 9.413716963936033e-06, |
|
"loss": 0.0896, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.31316725978647686, |
|
"grad_norm": 1.310185430863544, |
|
"learning_rate": 9.407133476578778e-06, |
|
"loss": 0.0957, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.31494661921708184, |
|
"grad_norm": 2.360181918459286, |
|
"learning_rate": 9.400515560329698e-06, |
|
"loss": 0.1584, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.3167259786476868, |
|
"grad_norm": 1.5426233587891112, |
|
"learning_rate": 9.393863266888501e-06, |
|
"loss": 0.0902, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3185053380782918, |
|
"grad_norm": 1.9813009275723696, |
|
"learning_rate": 9.387176648223457e-06, |
|
"loss": 0.1277, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.3202846975088968, |
|
"grad_norm": 1.5489872044528508, |
|
"learning_rate": 9.38045575657098e-06, |
|
"loss": 0.0979, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3220640569395018, |
|
"grad_norm": 1.8098316556598182, |
|
"learning_rate": 9.37370064443524e-06, |
|
"loss": 0.1227, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3238434163701068, |
|
"grad_norm": 1.3171681457389028, |
|
"learning_rate": 9.366911364587726e-06, |
|
"loss": 0.089, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.32562277580071175, |
|
"grad_norm": 1.57797318826547, |
|
"learning_rate": 9.360087970066854e-06, |
|
"loss": 0.1075, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.3274021352313167, |
|
"grad_norm": 1.3113854461093715, |
|
"learning_rate": 9.353230514177553e-06, |
|
"loss": 0.0825, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.3291814946619217, |
|
"grad_norm": 1.3518269132199556, |
|
"learning_rate": 9.346339050490832e-06, |
|
"loss": 0.0943, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3309608540925267, |
|
"grad_norm": 1.7928659448605928, |
|
"learning_rate": 9.33941363284338e-06, |
|
"loss": 0.1181, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.33274021352313166, |
|
"grad_norm": 1.3959990014671488, |
|
"learning_rate": 9.332454315337129e-06, |
|
"loss": 0.1023, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.33451957295373663, |
|
"grad_norm": 1.5930800205084747, |
|
"learning_rate": 9.325461152338846e-06, |
|
"loss": 0.1056, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.33629893238434166, |
|
"grad_norm": 1.8361121242337763, |
|
"learning_rate": 9.3184341984797e-06, |
|
"loss": 0.1238, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.33807829181494664, |
|
"grad_norm": 1.4951511218126496, |
|
"learning_rate": 9.311373508654838e-06, |
|
"loss": 0.0977, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3398576512455516, |
|
"grad_norm": 1.9205225110251405, |
|
"learning_rate": 9.30427913802295e-06, |
|
"loss": 0.1227, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.3416370106761566, |
|
"grad_norm": 1.9319891629723007, |
|
"learning_rate": 9.297151142005852e-06, |
|
"loss": 0.1293, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.34341637010676157, |
|
"grad_norm": 1.5272380818478601, |
|
"learning_rate": 9.289989576288035e-06, |
|
"loss": 0.1001, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.34519572953736655, |
|
"grad_norm": 1.7685492371266802, |
|
"learning_rate": 9.282794496816244e-06, |
|
"loss": 0.1109, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3469750889679715, |
|
"grad_norm": 1.6045353382355851, |
|
"learning_rate": 9.27556595979904e-06, |
|
"loss": 0.1111, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3487544483985765, |
|
"grad_norm": 1.8629080319446891, |
|
"learning_rate": 9.26830402170635e-06, |
|
"loss": 0.1235, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.3505338078291815, |
|
"grad_norm": 2.0084442890646943, |
|
"learning_rate": 9.261008739269035e-06, |
|
"loss": 0.116, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.35231316725978645, |
|
"grad_norm": 1.3565065077728236, |
|
"learning_rate": 9.253680169478448e-06, |
|
"loss": 0.0952, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.3540925266903915, |
|
"grad_norm": 1.4775056512316014, |
|
"learning_rate": 9.246318369585983e-06, |
|
"loss": 0.1042, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.35587188612099646, |
|
"grad_norm": 1.759399525754178, |
|
"learning_rate": 9.238923397102629e-06, |
|
"loss": 0.1184, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35587188612099646, |
|
"eval_loss": 0.11225133389234543, |
|
"eval_runtime": 7.1112, |
|
"eval_samples_per_second": 6.469, |
|
"eval_steps_per_second": 1.687, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35765124555160144, |
|
"grad_norm": 1.7640038826695925, |
|
"learning_rate": 9.231495309798525e-06, |
|
"loss": 0.122, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.3594306049822064, |
|
"grad_norm": 1.5942764281206059, |
|
"learning_rate": 9.224034165702506e-06, |
|
"loss": 0.1022, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.3612099644128114, |
|
"grad_norm": 1.3161357398010278, |
|
"learning_rate": 9.216540023101646e-06, |
|
"loss": 0.0885, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.36298932384341637, |
|
"grad_norm": 1.7727816383441848, |
|
"learning_rate": 9.209012940540806e-06, |
|
"loss": 0.1203, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.36476868327402134, |
|
"grad_norm": 1.6354129449463897, |
|
"learning_rate": 9.20145297682218e-06, |
|
"loss": 0.1197, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.3665480427046263, |
|
"grad_norm": 2.0410099530090244, |
|
"learning_rate": 9.193860191004833e-06, |
|
"loss": 0.1481, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.3683274021352313, |
|
"grad_norm": 1.9239729528303056, |
|
"learning_rate": 9.186234642404234e-06, |
|
"loss": 0.1342, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.3701067615658363, |
|
"grad_norm": 1.7862109138335869, |
|
"learning_rate": 9.178576390591803e-06, |
|
"loss": 0.1104, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.3718861209964413, |
|
"grad_norm": 1.506681090449808, |
|
"learning_rate": 9.170885495394435e-06, |
|
"loss": 0.0973, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.3736654804270463, |
|
"grad_norm": 1.7799885883172202, |
|
"learning_rate": 9.16316201689404e-06, |
|
"loss": 0.1135, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.37544483985765126, |
|
"grad_norm": 1.9669659800008326, |
|
"learning_rate": 9.155406015427076e-06, |
|
"loss": 0.1363, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.37722419928825623, |
|
"grad_norm": 2.0719776760650825, |
|
"learning_rate": 9.147617551584066e-06, |
|
"loss": 0.1186, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.3790035587188612, |
|
"grad_norm": 1.9491198544331112, |
|
"learning_rate": 9.139796686209135e-06, |
|
"loss": 0.1142, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.3807829181494662, |
|
"grad_norm": 1.430233303477761, |
|
"learning_rate": 9.131943480399531e-06, |
|
"loss": 0.1014, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.38256227758007116, |
|
"grad_norm": 1.3277622509599765, |
|
"learning_rate": 9.124057995505148e-06, |
|
"loss": 0.1035, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.38434163701067614, |
|
"grad_norm": 2.549331858810126, |
|
"learning_rate": 9.11614029312805e-06, |
|
"loss": 0.1817, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.3861209964412811, |
|
"grad_norm": 1.9735108003053659, |
|
"learning_rate": 9.108190435121982e-06, |
|
"loss": 0.129, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.3879003558718861, |
|
"grad_norm": 1.719033638003505, |
|
"learning_rate": 9.100208483591892e-06, |
|
"loss": 0.1157, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.3896797153024911, |
|
"grad_norm": 2.156356308415069, |
|
"learning_rate": 9.092194500893448e-06, |
|
"loss": 0.1618, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.3914590747330961, |
|
"grad_norm": 1.4683627517533993, |
|
"learning_rate": 9.084148549632547e-06, |
|
"loss": 0.1121, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3932384341637011, |
|
"grad_norm": 1.7472816434732417, |
|
"learning_rate": 9.076070692664827e-06, |
|
"loss": 0.1252, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.39501779359430605, |
|
"grad_norm": 1.7506440571167625, |
|
"learning_rate": 9.067960993095176e-06, |
|
"loss": 0.1033, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.39679715302491103, |
|
"grad_norm": 1.9007908493009855, |
|
"learning_rate": 9.059819514277238e-06, |
|
"loss": 0.1293, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.398576512455516, |
|
"grad_norm": 1.4974776819482392, |
|
"learning_rate": 9.05164631981292e-06, |
|
"loss": 0.099, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.400355871886121, |
|
"grad_norm": 1.4686130683342167, |
|
"learning_rate": 9.043441473551893e-06, |
|
"loss": 0.1048, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.40213523131672596, |
|
"grad_norm": 1.664368814342642, |
|
"learning_rate": 9.035205039591099e-06, |
|
"loss": 0.1149, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.40391459074733094, |
|
"grad_norm": 2.250123930613801, |
|
"learning_rate": 9.02693708227424e-06, |
|
"loss": 0.1554, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.40569395017793597, |
|
"grad_norm": 1.5629270450122852, |
|
"learning_rate": 9.018637666191284e-06, |
|
"loss": 0.1093, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.40747330960854095, |
|
"grad_norm": 1.6208103385192705, |
|
"learning_rate": 9.010306856177958e-06, |
|
"loss": 0.1081, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.4092526690391459, |
|
"grad_norm": 1.472766715906046, |
|
"learning_rate": 9.001944717315236e-06, |
|
"loss": 0.1103, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4110320284697509, |
|
"grad_norm": 1.6336328998230707, |
|
"learning_rate": 8.993551314928846e-06, |
|
"loss": 0.1212, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.4128113879003559, |
|
"grad_norm": 1.2203229872192514, |
|
"learning_rate": 8.985126714588739e-06, |
|
"loss": 0.0883, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.41459074733096085, |
|
"grad_norm": 1.5678936017279035, |
|
"learning_rate": 8.976670982108591e-06, |
|
"loss": 0.1262, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.41637010676156583, |
|
"grad_norm": 1.775095124134494, |
|
"learning_rate": 8.968184183545285e-06, |
|
"loss": 0.1232, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.4181494661921708, |
|
"grad_norm": 1.7233839407432066, |
|
"learning_rate": 8.959666385198396e-06, |
|
"loss": 0.1251, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.4199288256227758, |
|
"grad_norm": 1.434049457062644, |
|
"learning_rate": 8.951117653609666e-06, |
|
"loss": 0.0977, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.42170818505338076, |
|
"grad_norm": 1.7742510579509623, |
|
"learning_rate": 8.9425380555625e-06, |
|
"loss": 0.1339, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.4234875444839858, |
|
"grad_norm": 1.2340213895127414, |
|
"learning_rate": 8.933927658081423e-06, |
|
"loss": 0.0914, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.42526690391459077, |
|
"grad_norm": 1.5229666079525772, |
|
"learning_rate": 8.925286528431578e-06, |
|
"loss": 0.1127, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.42704626334519574, |
|
"grad_norm": 1.6324738371529621, |
|
"learning_rate": 8.916614734118184e-06, |
|
"loss": 0.1084, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4288256227758007, |
|
"grad_norm": 1.284310540061983, |
|
"learning_rate": 8.907912342886016e-06, |
|
"loss": 0.0863, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4306049822064057, |
|
"grad_norm": 1.793709572509322, |
|
"learning_rate": 8.899179422718877e-06, |
|
"loss": 0.1187, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.43238434163701067, |
|
"grad_norm": 1.6081962313380027, |
|
"learning_rate": 8.890416041839061e-06, |
|
"loss": 0.1063, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.43416370106761565, |
|
"grad_norm": 1.359817840919398, |
|
"learning_rate": 8.881622268706825e-06, |
|
"loss": 0.0948, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.4359430604982206, |
|
"grad_norm": 1.5986909694805091, |
|
"learning_rate": 8.872798172019856e-06, |
|
"loss": 0.1072, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.4377224199288256, |
|
"grad_norm": 2.0881597153718863, |
|
"learning_rate": 8.863943820712726e-06, |
|
"loss": 0.1496, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.4395017793594306, |
|
"grad_norm": 1.9873617435317776, |
|
"learning_rate": 8.855059283956363e-06, |
|
"loss": 0.1453, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.4412811387900356, |
|
"grad_norm": 1.665810196954904, |
|
"learning_rate": 8.8461446311575e-06, |
|
"loss": 0.1074, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.4430604982206406, |
|
"grad_norm": 1.4932491610239713, |
|
"learning_rate": 8.837199931958147e-06, |
|
"loss": 0.1003, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.44483985765124556, |
|
"grad_norm": 1.4435294382880621, |
|
"learning_rate": 8.828225256235035e-06, |
|
"loss": 0.0881, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.44661921708185054, |
|
"grad_norm": 1.4311679398742556, |
|
"learning_rate": 8.819220674099074e-06, |
|
"loss": 0.1063, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.4483985765124555, |
|
"grad_norm": 1.7111080123843154, |
|
"learning_rate": 8.810186255894804e-06, |
|
"loss": 0.1224, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.4501779359430605, |
|
"grad_norm": 1.706749595068717, |
|
"learning_rate": 8.801122072199848e-06, |
|
"loss": 0.1172, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.45195729537366547, |
|
"grad_norm": 1.8940258565223675, |
|
"learning_rate": 8.792028193824364e-06, |
|
"loss": 0.0946, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.45373665480427045, |
|
"grad_norm": 1.2437079715854868, |
|
"learning_rate": 8.782904691810478e-06, |
|
"loss": 0.1035, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.4555160142348754, |
|
"grad_norm": 1.3987048313416997, |
|
"learning_rate": 8.77375163743175e-06, |
|
"loss": 0.0932, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.45729537366548045, |
|
"grad_norm": 1.5091345903884046, |
|
"learning_rate": 8.764569102192593e-06, |
|
"loss": 0.1176, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.45907473309608543, |
|
"grad_norm": 1.53893901757901, |
|
"learning_rate": 8.755357157827735e-06, |
|
"loss": 0.105, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.4608540925266904, |
|
"grad_norm": 2.0349055417887976, |
|
"learning_rate": 8.746115876301651e-06, |
|
"loss": 0.1287, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.4626334519572954, |
|
"grad_norm": 1.3619807286540278, |
|
"learning_rate": 8.736845329807994e-06, |
|
"loss": 0.1027, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.46441281138790036, |
|
"grad_norm": 1.7200619210688253, |
|
"learning_rate": 8.727545590769044e-06, |
|
"loss": 0.1148, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.46619217081850534, |
|
"grad_norm": 1.29491144058853, |
|
"learning_rate": 8.718216731835131e-06, |
|
"loss": 0.097, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.4679715302491103, |
|
"grad_norm": 1.737803906152931, |
|
"learning_rate": 8.708858825884075e-06, |
|
"loss": 0.134, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.4697508896797153, |
|
"grad_norm": 1.5436109990348272, |
|
"learning_rate": 8.699471946020612e-06, |
|
"loss": 0.0995, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.47153024911032027, |
|
"grad_norm": 1.717311408572477, |
|
"learning_rate": 8.690056165575825e-06, |
|
"loss": 0.1068, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.47330960854092524, |
|
"grad_norm": 1.446999842045156, |
|
"learning_rate": 8.680611558106571e-06, |
|
"loss": 0.1076, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.4750889679715303, |
|
"grad_norm": 1.4623243573058629, |
|
"learning_rate": 8.671138197394907e-06, |
|
"loss": 0.1033, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.47686832740213525, |
|
"grad_norm": 1.1161274016670877, |
|
"learning_rate": 8.661636157447511e-06, |
|
"loss": 0.0874, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.4786476868327402, |
|
"grad_norm": 1.6653334520249783, |
|
"learning_rate": 8.652105512495106e-06, |
|
"loss": 0.1068, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.4804270462633452, |
|
"grad_norm": 1.2161458299564971, |
|
"learning_rate": 8.64254633699188e-06, |
|
"loss": 0.0868, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4822064056939502, |
|
"grad_norm": 1.6124128879378954, |
|
"learning_rate": 8.632958705614905e-06, |
|
"loss": 0.1177, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.48398576512455516, |
|
"grad_norm": 1.5815763329041501, |
|
"learning_rate": 8.623342693263549e-06, |
|
"loss": 0.0969, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.48576512455516013, |
|
"grad_norm": 1.7004624219212305, |
|
"learning_rate": 8.6136983750589e-06, |
|
"loss": 0.1154, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.4875444839857651, |
|
"grad_norm": 1.6463060526789421, |
|
"learning_rate": 8.604025826343167e-06, |
|
"loss": 0.1336, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.4893238434163701, |
|
"grad_norm": 1.8458700533832704, |
|
"learning_rate": 8.594325122679107e-06, |
|
"loss": 0.1016, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.49110320284697506, |
|
"grad_norm": 1.400241761482658, |
|
"learning_rate": 8.584596339849419e-06, |
|
"loss": 0.1006, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.4928825622775801, |
|
"grad_norm": 1.3030153679055343, |
|
"learning_rate": 8.574839553856157e-06, |
|
"loss": 0.0981, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.49466192170818507, |
|
"grad_norm": 1.9609564947921418, |
|
"learning_rate": 8.565054840920145e-06, |
|
"loss": 0.1248, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.49644128113879005, |
|
"grad_norm": 1.501625876050522, |
|
"learning_rate": 8.55524227748037e-06, |
|
"loss": 0.081, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.498220640569395, |
|
"grad_norm": 1.4307061187281254, |
|
"learning_rate": 8.545401940193392e-06, |
|
"loss": 0.0982, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.7171548680560946, |
|
"learning_rate": 8.535533905932739e-06, |
|
"loss": 0.1197, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.501779359430605, |
|
"grad_norm": 1.986655797988503, |
|
"learning_rate": 8.525638251788312e-06, |
|
"loss": 0.1211, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.50355871886121, |
|
"grad_norm": 1.5080277530405581, |
|
"learning_rate": 8.515715055065783e-06, |
|
"loss": 0.1272, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.505338078291815, |
|
"grad_norm": 1.5757309521989196, |
|
"learning_rate": 8.505764393285985e-06, |
|
"loss": 0.1213, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.5071174377224199, |
|
"grad_norm": 1.6768913107820438, |
|
"learning_rate": 8.495786344184314e-06, |
|
"loss": 0.0985, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5088967971530249, |
|
"grad_norm": 1.4677635227980657, |
|
"learning_rate": 8.485780985710113e-06, |
|
"loss": 0.1074, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.5106761565836299, |
|
"grad_norm": 1.3638790268306467, |
|
"learning_rate": 8.475748396026074e-06, |
|
"loss": 0.1048, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.5124555160142349, |
|
"grad_norm": 1.5725536274668985, |
|
"learning_rate": 8.46568865350762e-06, |
|
"loss": 0.1037, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.5142348754448398, |
|
"grad_norm": 1.7503071358742441, |
|
"learning_rate": 8.45560183674229e-06, |
|
"loss": 0.1196, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.5160142348754448, |
|
"grad_norm": 1.8694516397773064, |
|
"learning_rate": 8.445488024529133e-06, |
|
"loss": 0.1333, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5177935943060499, |
|
"grad_norm": 1.2575804080590764, |
|
"learning_rate": 8.435347295878087e-06, |
|
"loss": 0.0901, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.5195729537366548, |
|
"grad_norm": 1.609456913194723, |
|
"learning_rate": 8.425179730009368e-06, |
|
"loss": 0.0988, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.5213523131672598, |
|
"grad_norm": 1.500355825147625, |
|
"learning_rate": 8.41498540635284e-06, |
|
"loss": 0.1146, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.5231316725978647, |
|
"grad_norm": 1.5951183869068255, |
|
"learning_rate": 8.404764404547404e-06, |
|
"loss": 0.0938, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.5249110320284698, |
|
"grad_norm": 1.777045156221559, |
|
"learning_rate": 8.394516804440374e-06, |
|
"loss": 0.128, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5266903914590747, |
|
"grad_norm": 1.8629659963912617, |
|
"learning_rate": 8.384242686086848e-06, |
|
"loss": 0.1408, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.5284697508896797, |
|
"grad_norm": 1.7599012235304912, |
|
"learning_rate": 8.373942129749094e-06, |
|
"loss": 0.1242, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.5302491103202847, |
|
"grad_norm": 1.119574448314899, |
|
"learning_rate": 8.363615215895908e-06, |
|
"loss": 0.0867, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.5320284697508897, |
|
"grad_norm": 1.8240982809629374, |
|
"learning_rate": 8.353262025202e-06, |
|
"loss": 0.1081, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.5338078291814946, |
|
"grad_norm": 1.6397903194163173, |
|
"learning_rate": 8.342882638547351e-06, |
|
"loss": 0.1136, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5355871886120996, |
|
"grad_norm": 1.3141545319515016, |
|
"learning_rate": 8.332477137016587e-06, |
|
"loss": 0.0988, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.5373665480427047, |
|
"grad_norm": 1.5647745567219857, |
|
"learning_rate": 8.322045601898354e-06, |
|
"loss": 0.1037, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.5391459074733096, |
|
"grad_norm": 1.5265871832571696, |
|
"learning_rate": 8.311588114684665e-06, |
|
"loss": 0.1066, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.5409252669039146, |
|
"grad_norm": 1.6981041049144696, |
|
"learning_rate": 8.301104757070276e-06, |
|
"loss": 0.1314, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.5427046263345195, |
|
"grad_norm": 1.7299682122696625, |
|
"learning_rate": 8.290595610952045e-06, |
|
"loss": 0.0995, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5444839857651246, |
|
"grad_norm": 1.7509904919237103, |
|
"learning_rate": 8.280060758428294e-06, |
|
"loss": 0.122, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.5462633451957295, |
|
"grad_norm": 1.2220161842156116, |
|
"learning_rate": 8.269500281798164e-06, |
|
"loss": 0.103, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.5480427046263345, |
|
"grad_norm": 1.551082922986217, |
|
"learning_rate": 8.258914263560971e-06, |
|
"loss": 0.0927, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.5498220640569395, |
|
"grad_norm": 1.8169323629139662, |
|
"learning_rate": 8.248302786415567e-06, |
|
"loss": 0.1235, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.5516014234875445, |
|
"grad_norm": 1.8194055366234647, |
|
"learning_rate": 8.237665933259693e-06, |
|
"loss": 0.1173, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5533807829181495, |
|
"grad_norm": 1.1874388195907888, |
|
"learning_rate": 8.227003787189323e-06, |
|
"loss": 0.0885, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.5551601423487544, |
|
"grad_norm": 1.2997828302914156, |
|
"learning_rate": 8.216316431498028e-06, |
|
"loss": 0.0738, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.5569395017793595, |
|
"grad_norm": 1.5840028144386842, |
|
"learning_rate": 8.205603949676317e-06, |
|
"loss": 0.1182, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.5587188612099644, |
|
"grad_norm": 1.6409321089622475, |
|
"learning_rate": 8.194866425410984e-06, |
|
"loss": 0.1064, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.5604982206405694, |
|
"grad_norm": 1.6069524977533478, |
|
"learning_rate": 8.184103942584456e-06, |
|
"loss": 0.1049, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.5622775800711743, |
|
"grad_norm": 1.2986593728214382, |
|
"learning_rate": 8.173316585274144e-06, |
|
"loss": 0.0879, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.5640569395017794, |
|
"grad_norm": 1.8774709707320876, |
|
"learning_rate": 8.162504437751775e-06, |
|
"loss": 0.1401, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.5658362989323843, |
|
"grad_norm": 1.3248052209389418, |
|
"learning_rate": 8.151667584482742e-06, |
|
"loss": 0.0901, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.5676156583629893, |
|
"grad_norm": 1.901394954720854, |
|
"learning_rate": 8.140806110125442e-06, |
|
"loss": 0.114, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.5693950177935944, |
|
"grad_norm": 1.4447365573596782, |
|
"learning_rate": 8.129920099530608e-06, |
|
"loss": 0.1116, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5711743772241993, |
|
"grad_norm": 1.4748933144387377, |
|
"learning_rate": 8.119009637740663e-06, |
|
"loss": 0.073, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.5729537366548043, |
|
"grad_norm": 1.2592177338541486, |
|
"learning_rate": 8.108074809989032e-06, |
|
"loss": 0.0937, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.5747330960854092, |
|
"grad_norm": 1.6363977371659915, |
|
"learning_rate": 8.097115701699498e-06, |
|
"loss": 0.0892, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.5765124555160143, |
|
"grad_norm": 1.6554554200520046, |
|
"learning_rate": 8.086132398485525e-06, |
|
"loss": 0.1163, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.5782918149466192, |
|
"grad_norm": 0.9857035769645125, |
|
"learning_rate": 8.075124986149583e-06, |
|
"loss": 0.0705, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5800711743772242, |
|
"grad_norm": 1.6364307526159463, |
|
"learning_rate": 8.064093550682494e-06, |
|
"loss": 0.1029, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.5818505338078291, |
|
"grad_norm": 1.5184178420520167, |
|
"learning_rate": 8.053038178262742e-06, |
|
"loss": 0.0901, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.5836298932384342, |
|
"grad_norm": 1.6291332459881513, |
|
"learning_rate": 8.041958955255815e-06, |
|
"loss": 0.0999, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.5854092526690391, |
|
"grad_norm": 1.6964889289639662, |
|
"learning_rate": 8.030855968213518e-06, |
|
"loss": 0.1134, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.5871886120996441, |
|
"grad_norm": 1.8881602959803243, |
|
"learning_rate": 8.019729303873307e-06, |
|
"loss": 0.1262, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5889679715302492, |
|
"grad_norm": 1.177876841616444, |
|
"learning_rate": 8.008579049157607e-06, |
|
"loss": 0.0775, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.5907473309608541, |
|
"grad_norm": 1.435662347375319, |
|
"learning_rate": 7.99740529117313e-06, |
|
"loss": 0.1069, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.5925266903914591, |
|
"grad_norm": 1.4236311709000644, |
|
"learning_rate": 7.986208117210198e-06, |
|
"loss": 0.1016, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.594306049822064, |
|
"grad_norm": 1.5112086613552627, |
|
"learning_rate": 7.974987614742066e-06, |
|
"loss": 0.1014, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.5960854092526691, |
|
"grad_norm": 1.522085868837324, |
|
"learning_rate": 7.963743871424224e-06, |
|
"loss": 0.0878, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.597864768683274, |
|
"grad_norm": 1.6170089489910016, |
|
"learning_rate": 7.952476975093729e-06, |
|
"loss": 0.11, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.599644128113879, |
|
"grad_norm": 1.7518299396101669, |
|
"learning_rate": 7.941187013768508e-06, |
|
"loss": 0.1182, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.6014234875444839, |
|
"grad_norm": 1.533508796136424, |
|
"learning_rate": 7.929874075646673e-06, |
|
"loss": 0.1058, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.603202846975089, |
|
"grad_norm": 1.6038193732106218, |
|
"learning_rate": 7.918538249105835e-06, |
|
"loss": 0.1141, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.604982206405694, |
|
"grad_norm": 1.5611759147778177, |
|
"learning_rate": 7.907179622702409e-06, |
|
"loss": 0.0972, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6067615658362989, |
|
"grad_norm": 1.380702235307811, |
|
"learning_rate": 7.895798285170927e-06, |
|
"loss": 0.0984, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.608540925266904, |
|
"grad_norm": 1.451249283151842, |
|
"learning_rate": 7.88439432542334e-06, |
|
"loss": 0.0912, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.6103202846975089, |
|
"grad_norm": 1.419332324207685, |
|
"learning_rate": 7.872967832548327e-06, |
|
"loss": 0.0951, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.6120996441281139, |
|
"grad_norm": 1.6412257802681618, |
|
"learning_rate": 7.861518895810597e-06, |
|
"loss": 0.1063, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.6138790035587188, |
|
"grad_norm": 1.5180403396344606, |
|
"learning_rate": 7.850047604650188e-06, |
|
"loss": 0.1198, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6156583629893239, |
|
"grad_norm": 1.5844214292212888, |
|
"learning_rate": 7.838554048681783e-06, |
|
"loss": 0.1106, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.6174377224199288, |
|
"grad_norm": 1.9430538727958824, |
|
"learning_rate": 7.827038317693988e-06, |
|
"loss": 0.1374, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.6192170818505338, |
|
"grad_norm": 1.340126032516179, |
|
"learning_rate": 7.815500501648654e-06, |
|
"loss": 0.0955, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.6209964412811388, |
|
"grad_norm": 1.7519177931976833, |
|
"learning_rate": 7.80394069068015e-06, |
|
"loss": 0.1168, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.6227758007117438, |
|
"grad_norm": 1.6229564889469241, |
|
"learning_rate": 7.79235897509468e-06, |
|
"loss": 0.1129, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6245551601423488, |
|
"grad_norm": 1.7343981949883889, |
|
"learning_rate": 7.780755445369563e-06, |
|
"loss": 0.1294, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.6263345195729537, |
|
"grad_norm": 2.080341737325839, |
|
"learning_rate": 7.769130192152538e-06, |
|
"loss": 0.1592, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.6281138790035588, |
|
"grad_norm": 1.5857520047908384, |
|
"learning_rate": 7.757483306261042e-06, |
|
"loss": 0.108, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.6298932384341637, |
|
"grad_norm": 1.7758875579921354, |
|
"learning_rate": 7.745814878681516e-06, |
|
"loss": 0.1101, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.6316725978647687, |
|
"grad_norm": 1.3422744579087493, |
|
"learning_rate": 7.734125000568684e-06, |
|
"loss": 0.0899, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6334519572953736, |
|
"grad_norm": 1.4642960359063446, |
|
"learning_rate": 7.722413763244837e-06, |
|
"loss": 0.1131, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.6352313167259787, |
|
"grad_norm": 1.0906709123670035, |
|
"learning_rate": 7.710681258199136e-06, |
|
"loss": 0.0785, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.6370106761565836, |
|
"grad_norm": 1.4388579019950174, |
|
"learning_rate": 7.69892757708688e-06, |
|
"loss": 0.0967, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.6387900355871886, |
|
"grad_norm": 1.5708028369833191, |
|
"learning_rate": 7.687152811728799e-06, |
|
"loss": 0.1094, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.6405693950177936, |
|
"grad_norm": 1.433716136755529, |
|
"learning_rate": 7.675357054110337e-06, |
|
"loss": 0.0956, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6423487544483986, |
|
"grad_norm": 1.3963348382562772, |
|
"learning_rate": 7.663540396380931e-06, |
|
"loss": 0.0942, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.6441281138790036, |
|
"grad_norm": 1.6516483121520178, |
|
"learning_rate": 7.651702930853287e-06, |
|
"loss": 0.1039, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.6459074733096085, |
|
"grad_norm": 1.6090726320026931, |
|
"learning_rate": 7.639844750002668e-06, |
|
"loss": 0.1101, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.6476868327402135, |
|
"grad_norm": 1.608953799588776, |
|
"learning_rate": 7.627965946466167e-06, |
|
"loss": 0.1136, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.6494661921708185, |
|
"grad_norm": 1.695849666087003, |
|
"learning_rate": 7.616066613041977e-06, |
|
"loss": 0.1178, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6512455516014235, |
|
"grad_norm": 2.078903493826317, |
|
"learning_rate": 7.6041468426886785e-06, |
|
"loss": 0.155, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.6530249110320284, |
|
"grad_norm": 1.2097069756766454, |
|
"learning_rate": 7.592206728524507e-06, |
|
"loss": 0.0804, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.6548042704626335, |
|
"grad_norm": 1.4957912828401791, |
|
"learning_rate": 7.580246363826621e-06, |
|
"loss": 0.115, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.6565836298932385, |
|
"grad_norm": 1.8795185627038717, |
|
"learning_rate": 7.568265842030381e-06, |
|
"loss": 0.146, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.6583629893238434, |
|
"grad_norm": 1.5377376618211203, |
|
"learning_rate": 7.556265256728618e-06, |
|
"loss": 0.1044, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6601423487544484, |
|
"grad_norm": 1.4799346497909849, |
|
"learning_rate": 7.544244701670894e-06, |
|
"loss": 0.098, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.6619217081850534, |
|
"grad_norm": 1.5717286730498725, |
|
"learning_rate": 7.532204270762786e-06, |
|
"loss": 0.1068, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.6637010676156584, |
|
"grad_norm": 1.5722358785148272, |
|
"learning_rate": 7.520144058065133e-06, |
|
"loss": 0.1046, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.6654804270462633, |
|
"grad_norm": 1.446496352921078, |
|
"learning_rate": 7.50806415779332e-06, |
|
"loss": 0.0928, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.6672597864768683, |
|
"grad_norm": 1.4102925649680855, |
|
"learning_rate": 7.495964664316525e-06, |
|
"loss": 0.1022, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6690391459074733, |
|
"grad_norm": 1.616794128074563, |
|
"learning_rate": 7.4838456721569975e-06, |
|
"loss": 0.1292, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.6708185053380783, |
|
"grad_norm": 1.3009634022228664, |
|
"learning_rate": 7.471707275989304e-06, |
|
"loss": 0.0929, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.6725978647686833, |
|
"grad_norm": 1.465620847884084, |
|
"learning_rate": 7.459549570639602e-06, |
|
"loss": 0.0879, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.6743772241992882, |
|
"grad_norm": 1.6038089906825124, |
|
"learning_rate": 7.447372651084896e-06, |
|
"loss": 0.1069, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.6761565836298933, |
|
"grad_norm": 1.4090259568638837, |
|
"learning_rate": 7.435176612452286e-06, |
|
"loss": 0.1109, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6779359430604982, |
|
"grad_norm": 1.353789165218491, |
|
"learning_rate": 7.4229615500182396e-06, |
|
"loss": 0.0916, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.6797153024911032, |
|
"grad_norm": 1.1317434288102732, |
|
"learning_rate": 7.4107275592078345e-06, |
|
"loss": 0.0785, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.6814946619217082, |
|
"grad_norm": 1.641050694483375, |
|
"learning_rate": 7.398474735594022e-06, |
|
"loss": 0.1045, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.6832740213523132, |
|
"grad_norm": 1.6782030985100371, |
|
"learning_rate": 7.386203174896872e-06, |
|
"loss": 0.122, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.6850533807829181, |
|
"grad_norm": 1.6622915710182833, |
|
"learning_rate": 7.373912972982838e-06, |
|
"loss": 0.0902, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6868327402135231, |
|
"grad_norm": 1.3008168517924603, |
|
"learning_rate": 7.361604225863992e-06, |
|
"loss": 0.0877, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.6886120996441281, |
|
"grad_norm": 1.7674500038091852, |
|
"learning_rate": 7.349277029697287e-06, |
|
"loss": 0.1086, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.6903914590747331, |
|
"grad_norm": 1.837388181885663, |
|
"learning_rate": 7.336931480783801e-06, |
|
"loss": 0.1316, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.6921708185053381, |
|
"grad_norm": 1.8080138264969363, |
|
"learning_rate": 7.3245676755679854e-06, |
|
"loss": 0.1099, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.693950177935943, |
|
"grad_norm": 1.500706950647534, |
|
"learning_rate": 7.312185710636911e-06, |
|
"loss": 0.099, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6957295373665481, |
|
"grad_norm": 1.3954723853788058, |
|
"learning_rate": 7.299785682719512e-06, |
|
"loss": 0.0885, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.697508896797153, |
|
"grad_norm": 1.4305276542840808, |
|
"learning_rate": 7.287367688685835e-06, |
|
"loss": 0.0985, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.699288256227758, |
|
"grad_norm": 2.0301587894122277, |
|
"learning_rate": 7.274931825546279e-06, |
|
"loss": 0.1316, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.701067615658363, |
|
"grad_norm": 1.5426044621297716, |
|
"learning_rate": 7.262478190450834e-06, |
|
"loss": 0.1134, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.702846975088968, |
|
"grad_norm": 1.7620658885459586, |
|
"learning_rate": 7.250006880688332e-06, |
|
"loss": 0.1185, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7046263345195729, |
|
"grad_norm": 2.05065488404836, |
|
"learning_rate": 7.2375179936856775e-06, |
|
"loss": 0.1238, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.7064056939501779, |
|
"grad_norm": 1.2799631890101968, |
|
"learning_rate": 7.22501162700709e-06, |
|
"loss": 0.0851, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.708185053380783, |
|
"grad_norm": 1.437127726294493, |
|
"learning_rate": 7.21248787835334e-06, |
|
"loss": 0.0847, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.7099644128113879, |
|
"grad_norm": 1.4045202665222167, |
|
"learning_rate": 7.199946845560994e-06, |
|
"loss": 0.1022, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.7117437722419929, |
|
"grad_norm": 2.0699807718496115, |
|
"learning_rate": 7.1873886266016365e-06, |
|
"loss": 0.1292, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7117437722419929, |
|
"eval_loss": 0.1060875877737999, |
|
"eval_runtime": 7.1181, |
|
"eval_samples_per_second": 6.462, |
|
"eval_steps_per_second": 1.686, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7135231316725978, |
|
"grad_norm": 1.360335438108467, |
|
"learning_rate": 7.174813319581115e-06, |
|
"loss": 0.1028, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.7153024911032029, |
|
"grad_norm": 1.8395760868501272, |
|
"learning_rate": 7.162221022738768e-06, |
|
"loss": 0.1107, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.7170818505338078, |
|
"grad_norm": 1.40072281253199, |
|
"learning_rate": 7.149611834446664e-06, |
|
"loss": 0.0987, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.7188612099644128, |
|
"grad_norm": 1.6677063043439908, |
|
"learning_rate": 7.136985853208824e-06, |
|
"loss": 0.1164, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.7206405693950177, |
|
"grad_norm": 1.4063547956304991, |
|
"learning_rate": 7.124343177660462e-06, |
|
"loss": 0.094, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.7224199288256228, |
|
"grad_norm": 2.110714854290324, |
|
"learning_rate": 7.111683906567206e-06, |
|
"loss": 0.1084, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.7241992882562278, |
|
"grad_norm": 1.6357369095417866, |
|
"learning_rate": 7.099008138824329e-06, |
|
"loss": 0.107, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.7259786476868327, |
|
"grad_norm": 1.5905661512211369, |
|
"learning_rate": 7.086315973455982e-06, |
|
"loss": 0.124, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.7277580071174378, |
|
"grad_norm": 1.3466687007611506, |
|
"learning_rate": 7.0736075096144084e-06, |
|
"loss": 0.1044, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.7295373665480427, |
|
"grad_norm": 1.3501659476664134, |
|
"learning_rate": 7.060882846579182e-06, |
|
"loss": 0.0869, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7313167259786477, |
|
"grad_norm": 1.3191630151005758, |
|
"learning_rate": 7.048142083756427e-06, |
|
"loss": 0.0948, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.7330960854092526, |
|
"grad_norm": 1.7172306950884948, |
|
"learning_rate": 7.035385320678035e-06, |
|
"loss": 0.1262, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.7348754448398577, |
|
"grad_norm": 1.777003732127494, |
|
"learning_rate": 7.022612657000898e-06, |
|
"loss": 0.12, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.7366548042704626, |
|
"grad_norm": 1.5593347551504504, |
|
"learning_rate": 7.0098241925061215e-06, |
|
"loss": 0.0975, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.7384341637010676, |
|
"grad_norm": 1.9625019828342702, |
|
"learning_rate": 6.997020027098249e-06, |
|
"loss": 0.1226, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.7402135231316725, |
|
"grad_norm": 1.530299198312262, |
|
"learning_rate": 6.9842002608044844e-06, |
|
"loss": 0.1027, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.7419928825622776, |
|
"grad_norm": 1.4336560296063166, |
|
"learning_rate": 6.971364993773901e-06, |
|
"loss": 0.0982, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.7437722419928826, |
|
"grad_norm": 1.3728796028488062, |
|
"learning_rate": 6.958514326276669e-06, |
|
"loss": 0.0785, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.7455516014234875, |
|
"grad_norm": 1.3390960023538374, |
|
"learning_rate": 6.945648358703269e-06, |
|
"loss": 0.0882, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.7473309608540926, |
|
"grad_norm": 1.585526668337918, |
|
"learning_rate": 6.932767191563703e-06, |
|
"loss": 0.1011, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7491103202846975, |
|
"grad_norm": 1.4878022643626347, |
|
"learning_rate": 6.919870925486718e-06, |
|
"loss": 0.1027, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.7508896797153025, |
|
"grad_norm": 1.5148362490072826, |
|
"learning_rate": 6.906959661219011e-06, |
|
"loss": 0.0987, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.7526690391459074, |
|
"grad_norm": 1.274729586607756, |
|
"learning_rate": 6.8940334996244505e-06, |
|
"loss": 0.0869, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.7544483985765125, |
|
"grad_norm": 1.7153995057145504, |
|
"learning_rate": 6.881092541683279e-06, |
|
"loss": 0.1103, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.7562277580071174, |
|
"grad_norm": 1.3844299585979176, |
|
"learning_rate": 6.8681368884913345e-06, |
|
"loss": 0.0928, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7580071174377224, |
|
"grad_norm": 1.620609531274265, |
|
"learning_rate": 6.855166641259252e-06, |
|
"loss": 0.1266, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.7597864768683275, |
|
"grad_norm": 1.356040705364429, |
|
"learning_rate": 6.8421819013116766e-06, |
|
"loss": 0.0848, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.7615658362989324, |
|
"grad_norm": 1.602367200808915, |
|
"learning_rate": 6.829182770086474e-06, |
|
"loss": 0.1481, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.7633451957295374, |
|
"grad_norm": 1.618709127750825, |
|
"learning_rate": 6.816169349133934e-06, |
|
"loss": 0.1084, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.7651245551601423, |
|
"grad_norm": 1.2434274068233606, |
|
"learning_rate": 6.803141740115979e-06, |
|
"loss": 0.0816, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7669039145907474, |
|
"grad_norm": 2.245145585193462, |
|
"learning_rate": 6.7901000448053676e-06, |
|
"loss": 0.124, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.7686832740213523, |
|
"grad_norm": 1.1104120817280436, |
|
"learning_rate": 6.777044365084907e-06, |
|
"loss": 0.0741, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.7704626334519573, |
|
"grad_norm": 1.413924205432467, |
|
"learning_rate": 6.763974802946649e-06, |
|
"loss": 0.0936, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.7722419928825622, |
|
"grad_norm": 1.4606857459382603, |
|
"learning_rate": 6.750891460491093e-06, |
|
"loss": 0.1064, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.7740213523131673, |
|
"grad_norm": 1.668160276106634, |
|
"learning_rate": 6.737794439926395e-06, |
|
"loss": 0.1115, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.7758007117437722, |
|
"grad_norm": 1.284057413192129, |
|
"learning_rate": 6.724683843567567e-06, |
|
"loss": 0.0908, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.7775800711743772, |
|
"grad_norm": 1.5664213345906381, |
|
"learning_rate": 6.711559773835672e-06, |
|
"loss": 0.1082, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.7793594306049823, |
|
"grad_norm": 1.6474378920259822, |
|
"learning_rate": 6.69842233325703e-06, |
|
"loss": 0.0955, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.7811387900355872, |
|
"grad_norm": 2.04262284297252, |
|
"learning_rate": 6.685271624462416e-06, |
|
"loss": 0.139, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.7829181494661922, |
|
"grad_norm": 1.3588080779487557, |
|
"learning_rate": 6.672107750186255e-06, |
|
"loss": 0.0897, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7846975088967971, |
|
"grad_norm": 1.0380723054276066, |
|
"learning_rate": 6.658930813265825e-06, |
|
"loss": 0.0695, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.7864768683274022, |
|
"grad_norm": 1.4459161975915413, |
|
"learning_rate": 6.645740916640449e-06, |
|
"loss": 0.1112, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.7882562277580071, |
|
"grad_norm": 1.3921905450127778, |
|
"learning_rate": 6.63253816335069e-06, |
|
"loss": 0.0947, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.7900355871886121, |
|
"grad_norm": 1.2336130431728567, |
|
"learning_rate": 6.619322656537552e-06, |
|
"loss": 0.0766, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.791814946619217, |
|
"grad_norm": 1.7546421217658075, |
|
"learning_rate": 6.606094499441671e-06, |
|
"loss": 0.1113, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.7935943060498221, |
|
"grad_norm": 1.5210142561111772, |
|
"learning_rate": 6.592853795402502e-06, |
|
"loss": 0.1126, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.7953736654804271, |
|
"grad_norm": 1.422872691563868, |
|
"learning_rate": 6.579600647857525e-06, |
|
"loss": 0.1017, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.797153024911032, |
|
"grad_norm": 1.763652351417412, |
|
"learning_rate": 6.566335160341425e-06, |
|
"loss": 0.1143, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.798932384341637, |
|
"grad_norm": 1.2610180759986507, |
|
"learning_rate": 6.553057436485289e-06, |
|
"loss": 0.0891, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.800711743772242, |
|
"grad_norm": 1.2774834803753243, |
|
"learning_rate": 6.539767580015799e-06, |
|
"loss": 0.082, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.802491103202847, |
|
"grad_norm": 1.8562764358699877, |
|
"learning_rate": 6.52646569475441e-06, |
|
"loss": 0.1022, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.8042704626334519, |
|
"grad_norm": 1.8281637653338807, |
|
"learning_rate": 6.513151884616556e-06, |
|
"loss": 0.1287, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.806049822064057, |
|
"grad_norm": 1.5666772387134404, |
|
"learning_rate": 6.499826253610823e-06, |
|
"loss": 0.1044, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.8078291814946619, |
|
"grad_norm": 1.4072736131882742, |
|
"learning_rate": 6.486488905838143e-06, |
|
"loss": 0.1072, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.8096085409252669, |
|
"grad_norm": 1.2889392675829212, |
|
"learning_rate": 6.473139945490984e-06, |
|
"loss": 0.0736, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.8113879003558719, |
|
"grad_norm": 1.5524870505122597, |
|
"learning_rate": 6.459779476852528e-06, |
|
"loss": 0.0942, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.8131672597864769, |
|
"grad_norm": 1.2495159127388251, |
|
"learning_rate": 6.446407604295863e-06, |
|
"loss": 0.0854, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.8149466192170819, |
|
"grad_norm": 1.40009754290789, |
|
"learning_rate": 6.433024432283169e-06, |
|
"loss": 0.0933, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.8167259786476868, |
|
"grad_norm": 1.6667339616859487, |
|
"learning_rate": 6.41963006536489e-06, |
|
"loss": 0.1058, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.8185053380782918, |
|
"grad_norm": 1.6501982680564844, |
|
"learning_rate": 6.4062246081789316e-06, |
|
"loss": 0.1077, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8202846975088968, |
|
"grad_norm": 1.0959928599524473, |
|
"learning_rate": 6.392808165449836e-06, |
|
"loss": 0.0774, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.8220640569395018, |
|
"grad_norm": 1.6111246327129911, |
|
"learning_rate": 6.379380841987965e-06, |
|
"loss": 0.1082, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.8238434163701067, |
|
"grad_norm": 1.5421824213223168, |
|
"learning_rate": 6.365942742688684e-06, |
|
"loss": 0.0955, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.8256227758007118, |
|
"grad_norm": 2.0546960634997653, |
|
"learning_rate": 6.352493972531535e-06, |
|
"loss": 0.1068, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.8274021352313167, |
|
"grad_norm": 1.7225981945488118, |
|
"learning_rate": 6.339034636579425e-06, |
|
"loss": 0.1121, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.8291814946619217, |
|
"grad_norm": 1.231245543489658, |
|
"learning_rate": 6.325564839977802e-06, |
|
"loss": 0.0803, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.8309608540925267, |
|
"grad_norm": 1.3637048047361693, |
|
"learning_rate": 6.312084687953835e-06, |
|
"loss": 0.0978, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.8327402135231317, |
|
"grad_norm": 1.9168984713326007, |
|
"learning_rate": 6.298594285815585e-06, |
|
"loss": 0.132, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.8345195729537367, |
|
"grad_norm": 1.4944072728250193, |
|
"learning_rate": 6.2850937389511936e-06, |
|
"loss": 0.1154, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.8362989323843416, |
|
"grad_norm": 1.5275922821960304, |
|
"learning_rate": 6.271583152828049e-06, |
|
"loss": 0.0999, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8380782918149466, |
|
"grad_norm": 1.7973501823017823, |
|
"learning_rate": 6.258062632991972e-06, |
|
"loss": 0.1205, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.8398576512455516, |
|
"grad_norm": 1.3409081991662586, |
|
"learning_rate": 6.244532285066382e-06, |
|
"loss": 0.0949, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.8416370106761566, |
|
"grad_norm": 1.1523180603215484, |
|
"learning_rate": 6.2309922147514775e-06, |
|
"loss": 0.0825, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.8434163701067615, |
|
"grad_norm": 1.1778263000900968, |
|
"learning_rate": 6.2174425278234115e-06, |
|
"loss": 0.084, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.8451957295373665, |
|
"grad_norm": 1.5951920910262518, |
|
"learning_rate": 6.20388333013346e-06, |
|
"loss": 0.0956, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.8469750889679716, |
|
"grad_norm": 1.5159805020988901, |
|
"learning_rate": 6.190314727607196e-06, |
|
"loss": 0.1116, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.8487544483985765, |
|
"grad_norm": 1.4969079814052297, |
|
"learning_rate": 6.176736826243671e-06, |
|
"loss": 0.1053, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.8505338078291815, |
|
"grad_norm": 1.7002479258152148, |
|
"learning_rate": 6.163149732114571e-06, |
|
"loss": 0.1349, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.8523131672597865, |
|
"grad_norm": 1.664417888310737, |
|
"learning_rate": 6.149553551363404e-06, |
|
"loss": 0.1185, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.8540925266903915, |
|
"grad_norm": 1.4702761494044079, |
|
"learning_rate": 6.1359483902046605e-06, |
|
"loss": 0.0899, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8558718861209964, |
|
"grad_norm": 1.42811250103426, |
|
"learning_rate": 6.122334354922984e-06, |
|
"loss": 0.1096, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.8576512455516014, |
|
"grad_norm": 1.370793729888614, |
|
"learning_rate": 6.108711551872347e-06, |
|
"loss": 0.0995, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.8594306049822064, |
|
"grad_norm": 1.5202898793722948, |
|
"learning_rate": 6.095080087475218e-06, |
|
"loss": 0.0985, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.8612099644128114, |
|
"grad_norm": 1.5948421067505985, |
|
"learning_rate": 6.0814400682217236e-06, |
|
"loss": 0.1052, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.8629893238434164, |
|
"grad_norm": 1.2363931834828676, |
|
"learning_rate": 6.067791600668823e-06, |
|
"loss": 0.0689, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.8647686832740213, |
|
"grad_norm": 1.1363810658701408, |
|
"learning_rate": 6.054134791439479e-06, |
|
"loss": 0.0696, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.8665480427046264, |
|
"grad_norm": 1.2532875817813287, |
|
"learning_rate": 6.040469747221815e-06, |
|
"loss": 0.0784, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.8683274021352313, |
|
"grad_norm": 1.449685373586303, |
|
"learning_rate": 6.026796574768288e-06, |
|
"loss": 0.0987, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.8701067615658363, |
|
"grad_norm": 1.4472703121865234, |
|
"learning_rate": 6.013115380894854e-06, |
|
"loss": 0.1009, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.8718861209964412, |
|
"grad_norm": 1.1781213710858633, |
|
"learning_rate": 5.999426272480133e-06, |
|
"loss": 0.0731, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8736654804270463, |
|
"grad_norm": 1.4354345327386753, |
|
"learning_rate": 5.985729356464575e-06, |
|
"loss": 0.1139, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.8754448398576512, |
|
"grad_norm": 1.2575751863323292, |
|
"learning_rate": 5.972024739849622e-06, |
|
"loss": 0.085, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.8772241992882562, |
|
"grad_norm": 1.1534964177268272, |
|
"learning_rate": 5.958312529696874e-06, |
|
"loss": 0.0851, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.8790035587188612, |
|
"grad_norm": 1.5979890607148797, |
|
"learning_rate": 5.944592833127253e-06, |
|
"loss": 0.1115, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.8807829181494662, |
|
"grad_norm": 1.3572559567620228, |
|
"learning_rate": 5.9308657573201645e-06, |
|
"loss": 0.0763, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.8825622775800712, |
|
"grad_norm": 1.4273375722764527, |
|
"learning_rate": 5.917131409512663e-06, |
|
"loss": 0.0944, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.8843416370106761, |
|
"grad_norm": 1.4457012637384647, |
|
"learning_rate": 5.903389896998611e-06, |
|
"loss": 0.0939, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.8861209964412812, |
|
"grad_norm": 1.539973112725713, |
|
"learning_rate": 5.889641327127843e-06, |
|
"loss": 0.0956, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.8879003558718861, |
|
"grad_norm": 1.666238770522618, |
|
"learning_rate": 5.875885807305326e-06, |
|
"loss": 0.1097, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.8896797153024911, |
|
"grad_norm": 1.4444784442629337, |
|
"learning_rate": 5.862123444990319e-06, |
|
"loss": 0.101, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.891459074733096, |
|
"grad_norm": 1.6177612374223207, |
|
"learning_rate": 5.848354347695537e-06, |
|
"loss": 0.1248, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.8932384341637011, |
|
"grad_norm": 1.4465179675467945, |
|
"learning_rate": 5.83457862298631e-06, |
|
"loss": 0.1008, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.895017793594306, |
|
"grad_norm": 1.9179440268025802, |
|
"learning_rate": 5.8207963784797396e-06, |
|
"loss": 0.132, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.896797153024911, |
|
"grad_norm": 1.62432885903083, |
|
"learning_rate": 5.807007721843862e-06, |
|
"loss": 0.1085, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.8985765124555161, |
|
"grad_norm": 1.5318342201640707, |
|
"learning_rate": 5.793212760796804e-06, |
|
"loss": 0.1027, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.900355871886121, |
|
"grad_norm": 1.4299994974017267, |
|
"learning_rate": 5.779411603105947e-06, |
|
"loss": 0.0925, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.902135231316726, |
|
"grad_norm": 1.2607951634572778, |
|
"learning_rate": 5.765604356587076e-06, |
|
"loss": 0.0804, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.9039145907473309, |
|
"grad_norm": 1.3317793277168055, |
|
"learning_rate": 5.751791129103545e-06, |
|
"loss": 0.0989, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.905693950177936, |
|
"grad_norm": 1.973430335312529, |
|
"learning_rate": 5.737972028565431e-06, |
|
"loss": 0.1209, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.9074733096085409, |
|
"grad_norm": 1.6708577662169801, |
|
"learning_rate": 5.7241471629286934e-06, |
|
"loss": 0.0944, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9092526690391459, |
|
"grad_norm": 1.8734945300446482, |
|
"learning_rate": 5.7103166401943276e-06, |
|
"loss": 0.1211, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.9110320284697508, |
|
"grad_norm": 1.7469322688239424, |
|
"learning_rate": 5.696480568407523e-06, |
|
"loss": 0.1172, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.9128113879003559, |
|
"grad_norm": 1.890245401298048, |
|
"learning_rate": 5.682639055656817e-06, |
|
"loss": 0.1293, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.9145907473309609, |
|
"grad_norm": 1.5388046269087141, |
|
"learning_rate": 5.668792210073255e-06, |
|
"loss": 0.0995, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.9163701067615658, |
|
"grad_norm": 1.8004688298790124, |
|
"learning_rate": 5.654940139829544e-06, |
|
"loss": 0.1257, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.9181494661921709, |
|
"grad_norm": 1.3466276310541516, |
|
"learning_rate": 5.641082953139201e-06, |
|
"loss": 0.094, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.9199288256227758, |
|
"grad_norm": 1.414320811836631, |
|
"learning_rate": 5.6272207582557195e-06, |
|
"loss": 0.1016, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.9217081850533808, |
|
"grad_norm": 1.593906515364087, |
|
"learning_rate": 5.61335366347171e-06, |
|
"loss": 0.0866, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.9234875444839857, |
|
"grad_norm": 1.727487713114126, |
|
"learning_rate": 5.599481777118071e-06, |
|
"loss": 0.1221, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.9252669039145908, |
|
"grad_norm": 1.1878152643827515, |
|
"learning_rate": 5.585605207563124e-06, |
|
"loss": 0.0697, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9270462633451957, |
|
"grad_norm": 1.5906338184112423, |
|
"learning_rate": 5.571724063211782e-06, |
|
"loss": 0.0996, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.9288256227758007, |
|
"grad_norm": 1.240105234338914, |
|
"learning_rate": 5.557838452504692e-06, |
|
"loss": 0.0781, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.9306049822064056, |
|
"grad_norm": 1.8281425075639033, |
|
"learning_rate": 5.5439484839173996e-06, |
|
"loss": 0.1116, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.9323843416370107, |
|
"grad_norm": 1.269844966046842, |
|
"learning_rate": 5.530054265959486e-06, |
|
"loss": 0.0892, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.9341637010676157, |
|
"grad_norm": 1.8435053522750624, |
|
"learning_rate": 5.516155907173735e-06, |
|
"loss": 0.1203, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.9359430604982206, |
|
"grad_norm": 1.4230121786249026, |
|
"learning_rate": 5.5022535161352764e-06, |
|
"loss": 0.1013, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.9377224199288257, |
|
"grad_norm": 1.4462622336979762, |
|
"learning_rate": 5.488347201450741e-06, |
|
"loss": 0.0906, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.9395017793594306, |
|
"grad_norm": 1.4803221434681082, |
|
"learning_rate": 5.47443707175741e-06, |
|
"loss": 0.1019, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.9412811387900356, |
|
"grad_norm": 1.4152868985763685, |
|
"learning_rate": 5.46052323572237e-06, |
|
"loss": 0.0891, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.9430604982206405, |
|
"grad_norm": 1.525211078276158, |
|
"learning_rate": 5.446605802041662e-06, |
|
"loss": 0.1075, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9448398576512456, |
|
"grad_norm": 1.2095316656394155, |
|
"learning_rate": 5.432684879439428e-06, |
|
"loss": 0.0791, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.9466192170818505, |
|
"grad_norm": 1.6981302816818773, |
|
"learning_rate": 5.418760576667071e-06, |
|
"loss": 0.0953, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.9483985765124555, |
|
"grad_norm": 1.5305641384895756, |
|
"learning_rate": 5.404833002502398e-06, |
|
"loss": 0.1051, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.9501779359430605, |
|
"grad_norm": 1.6194211007852162, |
|
"learning_rate": 5.39090226574877e-06, |
|
"loss": 0.0975, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.9519572953736655, |
|
"grad_norm": 1.7305782558239267, |
|
"learning_rate": 5.376968475234258e-06, |
|
"loss": 0.128, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.9537366548042705, |
|
"grad_norm": 1.0450794649816855, |
|
"learning_rate": 5.363031739810787e-06, |
|
"loss": 0.078, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.9555160142348754, |
|
"grad_norm": 1.19156604229689, |
|
"learning_rate": 5.349092168353291e-06, |
|
"loss": 0.0825, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.9572953736654805, |
|
"grad_norm": 1.3152694977899457, |
|
"learning_rate": 5.335149869758855e-06, |
|
"loss": 0.1002, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.9590747330960854, |
|
"grad_norm": 1.423723687781324, |
|
"learning_rate": 5.32120495294587e-06, |
|
"loss": 0.0955, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.9608540925266904, |
|
"grad_norm": 1.342676260393346, |
|
"learning_rate": 5.3072575268531835e-06, |
|
"loss": 0.1016, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9626334519572953, |
|
"grad_norm": 1.4674569629431873, |
|
"learning_rate": 5.293307700439242e-06, |
|
"loss": 0.1011, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.9644128113879004, |
|
"grad_norm": 1.9438181150811278, |
|
"learning_rate": 5.2793555826812456e-06, |
|
"loss": 0.111, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.9661921708185054, |
|
"grad_norm": 1.2945370529724887, |
|
"learning_rate": 5.265401282574294e-06, |
|
"loss": 0.0802, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.9679715302491103, |
|
"grad_norm": 1.8064959380260417, |
|
"learning_rate": 5.2514449091305375e-06, |
|
"loss": 0.1108, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.9697508896797153, |
|
"grad_norm": 1.3295956284448094, |
|
"learning_rate": 5.237486571378317e-06, |
|
"loss": 0.0893, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.9715302491103203, |
|
"grad_norm": 1.0779100532538188, |
|
"learning_rate": 5.22352637836133e-06, |
|
"loss": 0.0765, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.9733096085409253, |
|
"grad_norm": 1.2402001729441934, |
|
"learning_rate": 5.209564439137755e-06, |
|
"loss": 0.0771, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.9750889679715302, |
|
"grad_norm": 1.5747103886990548, |
|
"learning_rate": 5.195600862779421e-06, |
|
"loss": 0.1098, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.9768683274021353, |
|
"grad_norm": 1.3999237542595961, |
|
"learning_rate": 5.181635758370942e-06, |
|
"loss": 0.0875, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.9786476868327402, |
|
"grad_norm": 1.7567746869528922, |
|
"learning_rate": 5.167669235008871e-06, |
|
"loss": 0.1105, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9804270462633452, |
|
"grad_norm": 1.633067000043038, |
|
"learning_rate": 5.153701401800845e-06, |
|
"loss": 0.1153, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.9822064056939501, |
|
"grad_norm": 1.7783283171673896, |
|
"learning_rate": 5.139732367864736e-06, |
|
"loss": 0.0961, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.9839857651245552, |
|
"grad_norm": 1.2487630659042905, |
|
"learning_rate": 5.1257622423277934e-06, |
|
"loss": 0.0814, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.9857651245551602, |
|
"grad_norm": 1.4761507116182648, |
|
"learning_rate": 5.111791134325793e-06, |
|
"loss": 0.1034, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.9875444839857651, |
|
"grad_norm": 1.6971876863314106, |
|
"learning_rate": 5.097819153002192e-06, |
|
"loss": 0.1112, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.9893238434163701, |
|
"grad_norm": 1.1232763161143038, |
|
"learning_rate": 5.083846407507263e-06, |
|
"loss": 0.0675, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.9911032028469751, |
|
"grad_norm": 1.7804252788172585, |
|
"learning_rate": 5.0698730069972535e-06, |
|
"loss": 0.105, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.9928825622775801, |
|
"grad_norm": 1.3090009484948444, |
|
"learning_rate": 5.055899060633524e-06, |
|
"loss": 0.0782, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.994661921708185, |
|
"grad_norm": 1.4476976498034746, |
|
"learning_rate": 5.041924677581702e-06, |
|
"loss": 0.1115, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.99644128113879, |
|
"grad_norm": 1.536195880916628, |
|
"learning_rate": 5.0279499670108245e-06, |
|
"loss": 0.1041, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.998220640569395, |
|
"grad_norm": 1.3582834070352197, |
|
"learning_rate": 5.013975038092491e-06, |
|
"loss": 0.0822, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.3837021504339198, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0711, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.001779359430605, |
|
"grad_norm": 1.3110256886683114, |
|
"learning_rate": 4.98602496190751e-06, |
|
"loss": 0.0566, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.00355871886121, |
|
"grad_norm": 0.9674466454167672, |
|
"learning_rate": 4.9720500329891755e-06, |
|
"loss": 0.0476, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.0053380782918149, |
|
"grad_norm": 0.936470718489541, |
|
"learning_rate": 4.9580753224183005e-06, |
|
"loss": 0.0404, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.00711743772242, |
|
"grad_norm": 0.9276836160337036, |
|
"learning_rate": 4.944100939366478e-06, |
|
"loss": 0.0414, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.008896797153025, |
|
"grad_norm": 1.1734277726254059, |
|
"learning_rate": 4.930126993002748e-06, |
|
"loss": 0.0549, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.01067615658363, |
|
"grad_norm": 0.9692022077817103, |
|
"learning_rate": 4.9161535924927375e-06, |
|
"loss": 0.0493, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.0124555160142348, |
|
"grad_norm": 1.1491983335285068, |
|
"learning_rate": 4.90218084699781e-06, |
|
"loss": 0.0475, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.0142348754448398, |
|
"grad_norm": 1.0590429151253145, |
|
"learning_rate": 4.888208865674208e-06, |
|
"loss": 0.0395, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.0160142348754448, |
|
"grad_norm": 1.2977630739548132, |
|
"learning_rate": 4.874237757672209e-06, |
|
"loss": 0.0762, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.0177935943060499, |
|
"grad_norm": 1.0025300599666722, |
|
"learning_rate": 4.8602676321352646e-06, |
|
"loss": 0.0511, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.019572953736655, |
|
"grad_norm": 1.0062907186206536, |
|
"learning_rate": 4.846298598199155e-06, |
|
"loss": 0.0394, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.0213523131672597, |
|
"grad_norm": 1.3270429388395375, |
|
"learning_rate": 4.832330764991131e-06, |
|
"loss": 0.056, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.0231316725978647, |
|
"grad_norm": 1.5557666317784409, |
|
"learning_rate": 4.81836424162906e-06, |
|
"loss": 0.0666, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.0249110320284698, |
|
"grad_norm": 0.8483846160655948, |
|
"learning_rate": 4.80439913722058e-06, |
|
"loss": 0.0331, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.0266903914590748, |
|
"grad_norm": 1.2118752857193735, |
|
"learning_rate": 4.790435560862247e-06, |
|
"loss": 0.0447, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.0284697508896796, |
|
"grad_norm": 0.9924461748447586, |
|
"learning_rate": 4.776473621638673e-06, |
|
"loss": 0.0369, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.0302491103202847, |
|
"grad_norm": 1.1071197338366345, |
|
"learning_rate": 4.762513428621684e-06, |
|
"loss": 0.0396, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.0320284697508897, |
|
"grad_norm": 0.8748369558537417, |
|
"learning_rate": 4.748555090869464e-06, |
|
"loss": 0.0297, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.0338078291814947, |
|
"grad_norm": 1.352753780195708, |
|
"learning_rate": 4.734598717425706e-06, |
|
"loss": 0.0521, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.0355871886120998, |
|
"grad_norm": 1.6386530048292836, |
|
"learning_rate": 4.720644417318755e-06, |
|
"loss": 0.0685, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.0373665480427046, |
|
"grad_norm": 1.3418207413633394, |
|
"learning_rate": 4.70669229956076e-06, |
|
"loss": 0.0447, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.0391459074733096, |
|
"grad_norm": 1.280258380588223, |
|
"learning_rate": 4.692742473146818e-06, |
|
"loss": 0.0338, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.0409252669039146, |
|
"grad_norm": 1.1814483153992954, |
|
"learning_rate": 4.678795047054131e-06, |
|
"loss": 0.0401, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.0427046263345197, |
|
"grad_norm": 1.195918597179844, |
|
"learning_rate": 4.664850130241146e-06, |
|
"loss": 0.0386, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.0444839857651245, |
|
"grad_norm": 0.9653038581167326, |
|
"learning_rate": 4.650907831646711e-06, |
|
"loss": 0.0314, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.0462633451957295, |
|
"grad_norm": 1.1302392565174153, |
|
"learning_rate": 4.636968260189214e-06, |
|
"loss": 0.0445, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.0480427046263345, |
|
"grad_norm": 1.048588867844343, |
|
"learning_rate": 4.623031524765744e-06, |
|
"loss": 0.0382, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.0498220640569396, |
|
"grad_norm": 0.8772040369677692, |
|
"learning_rate": 4.609097734251231e-06, |
|
"loss": 0.0345, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.0516014234875444, |
|
"grad_norm": 2.041986577370149, |
|
"learning_rate": 4.595166997497605e-06, |
|
"loss": 0.0451, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.0533807829181494, |
|
"grad_norm": 1.363823772611878, |
|
"learning_rate": 4.58123942333293e-06, |
|
"loss": 0.0653, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.0551601423487544, |
|
"grad_norm": 1.3009927088430149, |
|
"learning_rate": 4.567315120560573e-06, |
|
"loss": 0.0447, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.0569395017793595, |
|
"grad_norm": 0.9170091763642279, |
|
"learning_rate": 4.553394197958339e-06, |
|
"loss": 0.0339, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.0587188612099645, |
|
"grad_norm": 1.0296609780231214, |
|
"learning_rate": 4.539476764277631e-06, |
|
"loss": 0.0394, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.0604982206405693, |
|
"grad_norm": 0.9535009237567651, |
|
"learning_rate": 4.525562928242592e-06, |
|
"loss": 0.0388, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.0622775800711743, |
|
"grad_norm": 1.2672761054356363, |
|
"learning_rate": 4.511652798549261e-06, |
|
"loss": 0.0538, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.0640569395017794, |
|
"grad_norm": 0.9940048147079016, |
|
"learning_rate": 4.497746483864725e-06, |
|
"loss": 0.0366, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.0658362989323844, |
|
"grad_norm": 0.969181545238689, |
|
"learning_rate": 4.483844092826267e-06, |
|
"loss": 0.0282, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.0676156583629894, |
|
"grad_norm": 0.9588906368525213, |
|
"learning_rate": 4.469945734040516e-06, |
|
"loss": 0.0325, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0676156583629894, |
|
"eval_loss": 0.10583119839429855, |
|
"eval_runtime": 7.105, |
|
"eval_samples_per_second": 6.474, |
|
"eval_steps_per_second": 1.689, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0693950177935942, |
|
"grad_norm": 1.557540236564698, |
|
"learning_rate": 4.456051516082603e-06, |
|
"loss": 0.0612, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.0711743772241993, |
|
"grad_norm": 1.5676988262423686, |
|
"learning_rate": 4.442161547495309e-06, |
|
"loss": 0.0619, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.0729537366548043, |
|
"grad_norm": 1.553762048979117, |
|
"learning_rate": 4.42827593678822e-06, |
|
"loss": 0.0439, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.0747330960854093, |
|
"grad_norm": 1.0721391008040864, |
|
"learning_rate": 4.414394792436877e-06, |
|
"loss": 0.0337, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.0765124555160142, |
|
"grad_norm": 1.121009951046993, |
|
"learning_rate": 4.400518222881931e-06, |
|
"loss": 0.0452, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.0782918149466192, |
|
"grad_norm": 1.3122504136315984, |
|
"learning_rate": 4.386646336528291e-06, |
|
"loss": 0.0536, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.0800711743772242, |
|
"grad_norm": 1.0144240824821742, |
|
"learning_rate": 4.372779241744282e-06, |
|
"loss": 0.0303, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.0818505338078293, |
|
"grad_norm": 1.4773248345833476, |
|
"learning_rate": 4.358917046860799e-06, |
|
"loss": 0.044, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.083629893238434, |
|
"grad_norm": 1.2449118200352698, |
|
"learning_rate": 4.345059860170458e-06, |
|
"loss": 0.0375, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.085409252669039, |
|
"grad_norm": 1.0559287204666719, |
|
"learning_rate": 4.331207789926746e-06, |
|
"loss": 0.0348, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0871886120996441, |
|
"grad_norm": 1.046328051115215, |
|
"learning_rate": 4.317360944343184e-06, |
|
"loss": 0.0404, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.0889679715302492, |
|
"grad_norm": 1.0266546582614653, |
|
"learning_rate": 4.303519431592479e-06, |
|
"loss": 0.0345, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.0907473309608542, |
|
"grad_norm": 1.1089548068989163, |
|
"learning_rate": 4.289683359805673e-06, |
|
"loss": 0.0352, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.092526690391459, |
|
"grad_norm": 1.5140879865791304, |
|
"learning_rate": 4.275852837071309e-06, |
|
"loss": 0.0587, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.094306049822064, |
|
"grad_norm": 0.9197262951108741, |
|
"learning_rate": 4.26202797143457e-06, |
|
"loss": 0.0262, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.096085409252669, |
|
"grad_norm": 1.3086179574541665, |
|
"learning_rate": 4.248208870896456e-06, |
|
"loss": 0.0401, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.097864768683274, |
|
"grad_norm": 0.85879753099663, |
|
"learning_rate": 4.234395643412925e-06, |
|
"loss": 0.0326, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.099644128113879, |
|
"grad_norm": 1.241973205260686, |
|
"learning_rate": 4.220588396894055e-06, |
|
"loss": 0.0435, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.101423487544484, |
|
"grad_norm": 1.1798658559230042, |
|
"learning_rate": 4.2067872392031965e-06, |
|
"loss": 0.0402, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.103202846975089, |
|
"grad_norm": 1.2910303670248457, |
|
"learning_rate": 4.192992278156141e-06, |
|
"loss": 0.0645, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.104982206405694, |
|
"grad_norm": 1.3420136979289303, |
|
"learning_rate": 4.179203621520262e-06, |
|
"loss": 0.0593, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.106761565836299, |
|
"grad_norm": 1.2593373492795998, |
|
"learning_rate": 4.165421377013691e-06, |
|
"loss": 0.0487, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.1085409252669038, |
|
"grad_norm": 1.059354892747715, |
|
"learning_rate": 4.151645652304465e-06, |
|
"loss": 0.0365, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.1103202846975089, |
|
"grad_norm": 1.2337138279970945, |
|
"learning_rate": 4.137876555009684e-06, |
|
"loss": 0.0425, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.112099644128114, |
|
"grad_norm": 1.3877591184023017, |
|
"learning_rate": 4.124114192694676e-06, |
|
"loss": 0.0536, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.113879003558719, |
|
"grad_norm": 0.9820002339976636, |
|
"learning_rate": 4.110358672872158e-06, |
|
"loss": 0.043, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.1156583629893237, |
|
"grad_norm": 1.5300543196844327, |
|
"learning_rate": 4.0966101030013915e-06, |
|
"loss": 0.0498, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.1174377224199288, |
|
"grad_norm": 0.9980330174002747, |
|
"learning_rate": 4.082868590487339e-06, |
|
"loss": 0.0334, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.1192170818505338, |
|
"grad_norm": 0.8869667167838485, |
|
"learning_rate": 4.069134242679837e-06, |
|
"loss": 0.0302, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.1209964412811388, |
|
"grad_norm": 1.171785702569316, |
|
"learning_rate": 4.055407166872748e-06, |
|
"loss": 0.0313, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.1227758007117439, |
|
"grad_norm": 0.9388190203075351, |
|
"learning_rate": 4.041687470303127e-06, |
|
"loss": 0.0293, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.1245551601423487, |
|
"grad_norm": 0.8766345377567041, |
|
"learning_rate": 4.02797526015038e-06, |
|
"loss": 0.0287, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.1263345195729537, |
|
"grad_norm": 0.8733916717465623, |
|
"learning_rate": 4.014270643535427e-06, |
|
"loss": 0.0294, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.1281138790035588, |
|
"grad_norm": 1.0498562115867607, |
|
"learning_rate": 4.000573727519868e-06, |
|
"loss": 0.0358, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.1298932384341638, |
|
"grad_norm": 1.1655867349112021, |
|
"learning_rate": 3.9868846191051465e-06, |
|
"loss": 0.0427, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.1316725978647686, |
|
"grad_norm": 1.3861670855441774, |
|
"learning_rate": 3.973203425231715e-06, |
|
"loss": 0.0574, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.1334519572953736, |
|
"grad_norm": 0.9610050317008267, |
|
"learning_rate": 3.959530252778187e-06, |
|
"loss": 0.0337, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.1352313167259787, |
|
"grad_norm": 1.5228893386654967, |
|
"learning_rate": 3.945865208560522e-06, |
|
"loss": 0.0634, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.1370106761565837, |
|
"grad_norm": 1.3279754161535577, |
|
"learning_rate": 3.932208399331177e-06, |
|
"loss": 0.0648, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.1387900355871885, |
|
"grad_norm": 1.2686525785616054, |
|
"learning_rate": 3.918559931778277e-06, |
|
"loss": 0.0558, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.1405693950177935, |
|
"grad_norm": 0.8455101672345104, |
|
"learning_rate": 3.904919912524784e-06, |
|
"loss": 0.0281, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.1423487544483986, |
|
"grad_norm": 1.0728198158687967, |
|
"learning_rate": 3.891288448127654e-06, |
|
"loss": 0.0424, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.1441281138790036, |
|
"grad_norm": 1.277720097359507, |
|
"learning_rate": 3.877665645077017e-06, |
|
"loss": 0.0412, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.1459074733096086, |
|
"grad_norm": 1.0972600320952652, |
|
"learning_rate": 3.86405160979534e-06, |
|
"loss": 0.0364, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.1476868327402134, |
|
"grad_norm": 0.7552252643326874, |
|
"learning_rate": 3.850446448636597e-06, |
|
"loss": 0.033, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.1494661921708185, |
|
"grad_norm": 0.9123403483968701, |
|
"learning_rate": 3.8368502678854296e-06, |
|
"loss": 0.0261, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.1512455516014235, |
|
"grad_norm": 1.053556123533311, |
|
"learning_rate": 3.8232631737563306e-06, |
|
"loss": 0.036, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.1530249110320285, |
|
"grad_norm": 1.1591426508879277, |
|
"learning_rate": 3.809685272392804e-06, |
|
"loss": 0.0419, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.1548042704626336, |
|
"grad_norm": 1.039917265953898, |
|
"learning_rate": 3.796116669866543e-06, |
|
"loss": 0.0412, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.1565836298932384, |
|
"grad_norm": 1.3135099471714666, |
|
"learning_rate": 3.78255747217659e-06, |
|
"loss": 0.0383, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.1583629893238434, |
|
"grad_norm": 1.0648180001000311, |
|
"learning_rate": 3.769007785248523e-06, |
|
"loss": 0.0491, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.1601423487544484, |
|
"grad_norm": 0.9844676980223385, |
|
"learning_rate": 3.7554677149336186e-06, |
|
"loss": 0.044, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.1619217081850535, |
|
"grad_norm": 1.0560422754045078, |
|
"learning_rate": 3.7419373670080284e-06, |
|
"loss": 0.0423, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.1637010676156583, |
|
"grad_norm": 1.1999821692086607, |
|
"learning_rate": 3.7284168471719527e-06, |
|
"loss": 0.0484, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.1654804270462633, |
|
"grad_norm": 1.2249800858961024, |
|
"learning_rate": 3.7149062610488085e-06, |
|
"loss": 0.0481, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.1672597864768683, |
|
"grad_norm": 1.1389746319144842, |
|
"learning_rate": 3.701405714184416e-06, |
|
"loss": 0.0418, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.1690391459074734, |
|
"grad_norm": 1.4232443299453386, |
|
"learning_rate": 3.687915312046166e-06, |
|
"loss": 0.0457, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.1708185053380782, |
|
"grad_norm": 1.1413651715186386, |
|
"learning_rate": 3.6744351600221994e-06, |
|
"loss": 0.0451, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.1725978647686832, |
|
"grad_norm": 1.0737977200053017, |
|
"learning_rate": 3.6609653634205773e-06, |
|
"loss": 0.0377, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.1743772241992882, |
|
"grad_norm": 1.2932789807052218, |
|
"learning_rate": 3.647506027468467e-06, |
|
"loss": 0.0411, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1761565836298933, |
|
"grad_norm": 1.11824231273566, |
|
"learning_rate": 3.6340572573113176e-06, |
|
"loss": 0.034, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.1779359430604983, |
|
"grad_norm": 1.0426839415146034, |
|
"learning_rate": 3.6206191580120346e-06, |
|
"loss": 0.0483, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.1797153024911031, |
|
"grad_norm": 1.1499824529986604, |
|
"learning_rate": 3.6071918345501655e-06, |
|
"loss": 0.0497, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.1814946619217082, |
|
"grad_norm": 1.1372828846100003, |
|
"learning_rate": 3.5937753918210705e-06, |
|
"loss": 0.0274, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.1832740213523132, |
|
"grad_norm": 0.9886416376041564, |
|
"learning_rate": 3.5803699346351117e-06, |
|
"loss": 0.0294, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.1850533807829182, |
|
"grad_norm": 0.8446483136609211, |
|
"learning_rate": 3.566975567716833e-06, |
|
"loss": 0.0282, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.1868327402135233, |
|
"grad_norm": 0.9970011055642171, |
|
"learning_rate": 3.5535923957041374e-06, |
|
"loss": 0.0363, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.188612099644128, |
|
"grad_norm": 0.9985911443777412, |
|
"learning_rate": 3.540220523147474e-06, |
|
"loss": 0.0335, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.190391459074733, |
|
"grad_norm": 1.0804683437733276, |
|
"learning_rate": 3.5268600545090183e-06, |
|
"loss": 0.0365, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.1921708185053381, |
|
"grad_norm": 0.9820631514366019, |
|
"learning_rate": 3.513511094161858e-06, |
|
"loss": 0.0348, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.193950177935943, |
|
"grad_norm": 1.1981891799699902, |
|
"learning_rate": 3.5001737463891793e-06, |
|
"loss": 0.0435, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.195729537366548, |
|
"grad_norm": 1.057301583637597, |
|
"learning_rate": 3.4868481153834454e-06, |
|
"loss": 0.0376, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.197508896797153, |
|
"grad_norm": 1.0455633039976158, |
|
"learning_rate": 3.4735343052455905e-06, |
|
"loss": 0.0401, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.199288256227758, |
|
"grad_norm": 0.9421516612136216, |
|
"learning_rate": 3.4602324199842026e-06, |
|
"loss": 0.026, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.201067615658363, |
|
"grad_norm": 1.0828771440913072, |
|
"learning_rate": 3.446942563514711e-06, |
|
"loss": 0.0343, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.2028469750889679, |
|
"grad_norm": 1.1201330942862393, |
|
"learning_rate": 3.4336648396585777e-06, |
|
"loss": 0.0388, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.204626334519573, |
|
"grad_norm": 0.8999840911864917, |
|
"learning_rate": 3.4203993521424774e-06, |
|
"loss": 0.0338, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.206405693950178, |
|
"grad_norm": 0.95446781991142, |
|
"learning_rate": 3.407146204597499e-06, |
|
"loss": 0.0281, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.208185053380783, |
|
"grad_norm": 1.0132848548143234, |
|
"learning_rate": 3.3939055005583305e-06, |
|
"loss": 0.0339, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.209964412811388, |
|
"grad_norm": 1.142272778605831, |
|
"learning_rate": 3.3806773434624475e-06, |
|
"loss": 0.0477, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.2117437722419928, |
|
"grad_norm": 1.3835091302695752, |
|
"learning_rate": 3.3674618366493117e-06, |
|
"loss": 0.0528, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.2135231316725978, |
|
"grad_norm": 1.0578752828762186, |
|
"learning_rate": 3.3542590833595533e-06, |
|
"loss": 0.0371, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.2153024911032029, |
|
"grad_norm": 1.0868859451892747, |
|
"learning_rate": 3.341069186734176e-06, |
|
"loss": 0.0337, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.217081850533808, |
|
"grad_norm": 0.923026450789917, |
|
"learning_rate": 3.3278922498137455e-06, |
|
"loss": 0.0424, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.2188612099644127, |
|
"grad_norm": 1.0840367443587038, |
|
"learning_rate": 3.314728375537587e-06, |
|
"loss": 0.0357, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.2206405693950177, |
|
"grad_norm": 1.2913243654606925, |
|
"learning_rate": 3.3015776667429724e-06, |
|
"loss": 0.0527, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.2224199288256228, |
|
"grad_norm": 1.2016511478194003, |
|
"learning_rate": 3.2884402261643296e-06, |
|
"loss": 0.049, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.2241992882562278, |
|
"grad_norm": 0.8294059655064291, |
|
"learning_rate": 3.2753161564324344e-06, |
|
"loss": 0.0241, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.2259786476868326, |
|
"grad_norm": 1.1895066328882287, |
|
"learning_rate": 3.262205560073605e-06, |
|
"loss": 0.0346, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.2277580071174377, |
|
"grad_norm": 0.9441298892524774, |
|
"learning_rate": 3.249108539508909e-06, |
|
"loss": 0.033, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.2295373665480427, |
|
"grad_norm": 1.5221913307620616, |
|
"learning_rate": 3.2360251970533527e-06, |
|
"loss": 0.0555, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.2313167259786477, |
|
"grad_norm": 0.9894210219110501, |
|
"learning_rate": 3.2229556349150947e-06, |
|
"loss": 0.0341, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.2330960854092528, |
|
"grad_norm": 1.002617147132954, |
|
"learning_rate": 3.2098999551946337e-06, |
|
"loss": 0.0364, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.2348754448398576, |
|
"grad_norm": 1.3924997865217303, |
|
"learning_rate": 3.1968582598840234e-06, |
|
"loss": 0.0554, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.2366548042704626, |
|
"grad_norm": 1.4744194294447412, |
|
"learning_rate": 3.183830650866068e-06, |
|
"loss": 0.0417, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.2384341637010676, |
|
"grad_norm": 0.9488604375135772, |
|
"learning_rate": 3.1708172299135266e-06, |
|
"loss": 0.0331, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.2402135231316727, |
|
"grad_norm": 2.078021201733235, |
|
"learning_rate": 3.1578180986883234e-06, |
|
"loss": 0.0813, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.2419928825622777, |
|
"grad_norm": 1.0019805867253004, |
|
"learning_rate": 3.1448333587407486e-06, |
|
"loss": 0.0399, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.2437722419928825, |
|
"grad_norm": 0.9950877488512859, |
|
"learning_rate": 3.131863111508667e-06, |
|
"loss": 0.0336, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.2455516014234875, |
|
"grad_norm": 0.9678012969359862, |
|
"learning_rate": 3.118907458316722e-06, |
|
"loss": 0.0391, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2473309608540926, |
|
"grad_norm": 1.0962389774923096, |
|
"learning_rate": 3.105966500375551e-06, |
|
"loss": 0.0304, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.2491103202846976, |
|
"grad_norm": 1.1239325592676057, |
|
"learning_rate": 3.0930403387809892e-06, |
|
"loss": 0.0394, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.2508896797153026, |
|
"grad_norm": 0.9892818269097701, |
|
"learning_rate": 3.080129074513285e-06, |
|
"loss": 0.0361, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.2526690391459074, |
|
"grad_norm": 1.2186919155427427, |
|
"learning_rate": 3.067232808436299e-06, |
|
"loss": 0.0447, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.2544483985765125, |
|
"grad_norm": 1.1633708014276039, |
|
"learning_rate": 3.0543516412967327e-06, |
|
"loss": 0.0422, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.2562277580071175, |
|
"grad_norm": 1.332312712909846, |
|
"learning_rate": 3.041485673723331e-06, |
|
"loss": 0.048, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.2580071174377223, |
|
"grad_norm": 1.0630923149292173, |
|
"learning_rate": 3.0286350062261017e-06, |
|
"loss": 0.0374, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.2597864768683273, |
|
"grad_norm": 0.7810424814883251, |
|
"learning_rate": 3.0157997391955172e-06, |
|
"loss": 0.03, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.2615658362989324, |
|
"grad_norm": 1.43922072355609, |
|
"learning_rate": 3.0029799729017518e-06, |
|
"loss": 0.0466, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.2633451957295374, |
|
"grad_norm": 1.0108967324893467, |
|
"learning_rate": 2.9901758074938797e-06, |
|
"loss": 0.0352, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.2651245551601424, |
|
"grad_norm": 1.3031937833721658, |
|
"learning_rate": 2.977387342999103e-06, |
|
"loss": 0.0481, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.2669039145907472, |
|
"grad_norm": 1.3203694286813317, |
|
"learning_rate": 2.964614679321966e-06, |
|
"loss": 0.0511, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.2686832740213523, |
|
"grad_norm": 1.2174405499307732, |
|
"learning_rate": 2.951857916243574e-06, |
|
"loss": 0.0458, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.2704626334519573, |
|
"grad_norm": 1.2345540196540834, |
|
"learning_rate": 2.9391171534208185e-06, |
|
"loss": 0.0392, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.2722419928825623, |
|
"grad_norm": 1.4121365654173932, |
|
"learning_rate": 2.9263924903855932e-06, |
|
"loss": 0.0492, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.2740213523131674, |
|
"grad_norm": 1.1998077057371068, |
|
"learning_rate": 2.9136840265440213e-06, |
|
"loss": 0.0362, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.2758007117437722, |
|
"grad_norm": 1.1205654210796203, |
|
"learning_rate": 2.9009918611756732e-06, |
|
"loss": 0.0358, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.2775800711743772, |
|
"grad_norm": 1.1331974337111583, |
|
"learning_rate": 2.8883160934327968e-06, |
|
"loss": 0.0376, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.2793594306049823, |
|
"grad_norm": 1.2596525742904814, |
|
"learning_rate": 2.8756568223395396e-06, |
|
"loss": 0.0463, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.281138790035587, |
|
"grad_norm": 1.0082532702496807, |
|
"learning_rate": 2.8630141467911777e-06, |
|
"loss": 0.0418, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.282918149466192, |
|
"grad_norm": 1.1619865293081777, |
|
"learning_rate": 2.8503881655533395e-06, |
|
"loss": 0.0375, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.2846975088967971, |
|
"grad_norm": 1.2770965692484384, |
|
"learning_rate": 2.837778977261235e-06, |
|
"loss": 0.0408, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.2864768683274022, |
|
"grad_norm": 2.082337708580707, |
|
"learning_rate": 2.8251866804188875e-06, |
|
"loss": 0.0685, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.2882562277580072, |
|
"grad_norm": 1.2247124039908655, |
|
"learning_rate": 2.812611373398365e-06, |
|
"loss": 0.0467, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.290035587188612, |
|
"grad_norm": 1.2620175190676493, |
|
"learning_rate": 2.8000531544390064e-06, |
|
"loss": 0.0482, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.291814946619217, |
|
"grad_norm": 1.0747320256424733, |
|
"learning_rate": 2.7875121216466595e-06, |
|
"loss": 0.0344, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.293594306049822, |
|
"grad_norm": 1.8298784699119703, |
|
"learning_rate": 2.7749883729929105e-06, |
|
"loss": 0.0661, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.295373665480427, |
|
"grad_norm": 1.0821194568990946, |
|
"learning_rate": 2.762482006314324e-06, |
|
"loss": 0.0436, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.2971530249110321, |
|
"grad_norm": 1.042315472100243, |
|
"learning_rate": 2.7499931193116692e-06, |
|
"loss": 0.0359, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.298932384341637, |
|
"grad_norm": 1.0491178901958387, |
|
"learning_rate": 2.737521809549167e-06, |
|
"loss": 0.0353, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.300711743772242, |
|
"grad_norm": 1.133019044124063, |
|
"learning_rate": 2.725068174453722e-06, |
|
"loss": 0.0498, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.302491103202847, |
|
"grad_norm": 1.1345710407796288, |
|
"learning_rate": 2.712632311314165e-06, |
|
"loss": 0.0343, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.304270462633452, |
|
"grad_norm": 1.1907915789301322, |
|
"learning_rate": 2.7002143172804875e-06, |
|
"loss": 0.036, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.306049822064057, |
|
"grad_norm": 1.0494987334082084, |
|
"learning_rate": 2.6878142893630904e-06, |
|
"loss": 0.0362, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.3078291814946619, |
|
"grad_norm": 0.9695464000351004, |
|
"learning_rate": 2.6754323244320154e-06, |
|
"loss": 0.042, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.309608540925267, |
|
"grad_norm": 1.0311354529624681, |
|
"learning_rate": 2.6630685192161995e-06, |
|
"loss": 0.0398, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.311387900355872, |
|
"grad_norm": 1.084840709804411, |
|
"learning_rate": 2.650722970302714e-06, |
|
"loss": 0.0376, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.3131672597864767, |
|
"grad_norm": 1.5207534436339651, |
|
"learning_rate": 2.638395774136009e-06, |
|
"loss": 0.0577, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.3149466192170818, |
|
"grad_norm": 1.2265208197263842, |
|
"learning_rate": 2.6260870270171645e-06, |
|
"loss": 0.0307, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.3167259786476868, |
|
"grad_norm": 1.2048718444508941, |
|
"learning_rate": 2.613796825103129e-06, |
|
"loss": 0.046, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.3185053380782918, |
|
"grad_norm": 0.7406552314801639, |
|
"learning_rate": 2.60152526440598e-06, |
|
"loss": 0.0214, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.3202846975088969, |
|
"grad_norm": 0.8703119232425903, |
|
"learning_rate": 2.5892724407921667e-06, |
|
"loss": 0.0268, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.3220640569395017, |
|
"grad_norm": 1.4092377951144108, |
|
"learning_rate": 2.577038449981763e-06, |
|
"loss": 0.0532, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.3238434163701067, |
|
"grad_norm": 1.2338092106164478, |
|
"learning_rate": 2.564823387547716e-06, |
|
"loss": 0.0458, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.3256227758007118, |
|
"grad_norm": 1.1860269666747967, |
|
"learning_rate": 2.552627348915106e-06, |
|
"loss": 0.0489, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.3274021352313168, |
|
"grad_norm": 1.3831881289107064, |
|
"learning_rate": 2.5404504293603983e-06, |
|
"loss": 0.0541, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.3291814946619218, |
|
"grad_norm": 1.3978150003875889, |
|
"learning_rate": 2.528292724010697e-06, |
|
"loss": 0.0522, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.3309608540925266, |
|
"grad_norm": 0.8393396866757633, |
|
"learning_rate": 2.5161543278430055e-06, |
|
"loss": 0.0352, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.3327402135231317, |
|
"grad_norm": 1.2209103911348818, |
|
"learning_rate": 2.5040353356834756e-06, |
|
"loss": 0.0451, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.3345195729537367, |
|
"grad_norm": 1.2628382648253456, |
|
"learning_rate": 2.4919358422066816e-06, |
|
"loss": 0.0426, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.3362989323843417, |
|
"grad_norm": 0.976449963217236, |
|
"learning_rate": 2.4798559419348672e-06, |
|
"loss": 0.0308, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.3380782918149468, |
|
"grad_norm": 1.044800723672327, |
|
"learning_rate": 2.4677957292372166e-06, |
|
"loss": 0.0377, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.3398576512455516, |
|
"grad_norm": 1.1124481963004607, |
|
"learning_rate": 2.455755298329107e-06, |
|
"loss": 0.0425, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.3416370106761566, |
|
"grad_norm": 0.8766779898973657, |
|
"learning_rate": 2.4437347432713838e-06, |
|
"loss": 0.0325, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.3434163701067616, |
|
"grad_norm": 1.3576146479858933, |
|
"learning_rate": 2.431734157969619e-06, |
|
"loss": 0.0491, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.3451957295373664, |
|
"grad_norm": 1.0681139826922712, |
|
"learning_rate": 2.4197536361733792e-06, |
|
"loss": 0.0494, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.3469750889679715, |
|
"grad_norm": 1.0900639218622725, |
|
"learning_rate": 2.407793271475495e-06, |
|
"loss": 0.0389, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.3487544483985765, |
|
"grad_norm": 1.042140428989945, |
|
"learning_rate": 2.3958531573113223e-06, |
|
"loss": 0.0345, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.3505338078291815, |
|
"grad_norm": 1.1563320564651571, |
|
"learning_rate": 2.3839333869580243e-06, |
|
"loss": 0.0417, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.3523131672597866, |
|
"grad_norm": 0.951416796141525, |
|
"learning_rate": 2.372034053533835e-06, |
|
"loss": 0.0231, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.3540925266903914, |
|
"grad_norm": 0.8969561484746402, |
|
"learning_rate": 2.360155249997334e-06, |
|
"loss": 0.0352, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.3558718861209964, |
|
"grad_norm": 1.2844502948649295, |
|
"learning_rate": 2.348297069146715e-06, |
|
"loss": 0.0455, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.3576512455516014, |
|
"grad_norm": 0.7062232019279094, |
|
"learning_rate": 2.3364596036190706e-06, |
|
"loss": 0.0227, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.3594306049822065, |
|
"grad_norm": 1.047542522465205, |
|
"learning_rate": 2.3246429458896637e-06, |
|
"loss": 0.0393, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.3612099644128115, |
|
"grad_norm": 0.9725654270458679, |
|
"learning_rate": 2.312847188271203e-06, |
|
"loss": 0.0427, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.3629893238434163, |
|
"grad_norm": 1.065816524018262, |
|
"learning_rate": 2.301072422913123e-06, |
|
"loss": 0.0377, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.3647686832740213, |
|
"grad_norm": 0.8673764763816904, |
|
"learning_rate": 2.2893187418008666e-06, |
|
"loss": 0.0243, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.3665480427046264, |
|
"grad_norm": 0.911175468114547, |
|
"learning_rate": 2.2775862367551642e-06, |
|
"loss": 0.0323, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.3683274021352312, |
|
"grad_norm": 1.0087905113337534, |
|
"learning_rate": 2.265874999431318e-06, |
|
"loss": 0.0331, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.3701067615658362, |
|
"grad_norm": 1.4622138395884272, |
|
"learning_rate": 2.254185121318484e-06, |
|
"loss": 0.0422, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.3718861209964412, |
|
"grad_norm": 1.0398143526812824, |
|
"learning_rate": 2.2425166937389596e-06, |
|
"loss": 0.0389, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.3736654804270463, |
|
"grad_norm": 0.9497389125515033, |
|
"learning_rate": 2.2308698078474645e-06, |
|
"loss": 0.0404, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.3754448398576513, |
|
"grad_norm": 1.0549679968283046, |
|
"learning_rate": 2.219244554630438e-06, |
|
"loss": 0.0412, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.3772241992882561, |
|
"grad_norm": 1.0043105008310376, |
|
"learning_rate": 2.207641024905322e-06, |
|
"loss": 0.0303, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.3790035587188612, |
|
"grad_norm": 0.9558156743047374, |
|
"learning_rate": 2.1960593093198508e-06, |
|
"loss": 0.031, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.3807829181494662, |
|
"grad_norm": 1.0036151176920738, |
|
"learning_rate": 2.184499498351347e-06, |
|
"loss": 0.0389, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.3825622775800712, |
|
"grad_norm": 0.953995369852846, |
|
"learning_rate": 2.172961682306011e-06, |
|
"loss": 0.0319, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.3843416370106763, |
|
"grad_norm": 1.4070105530365404, |
|
"learning_rate": 2.1614459513182173e-06, |
|
"loss": 0.0472, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.386120996441281, |
|
"grad_norm": 1.1769529378951638, |
|
"learning_rate": 2.149952395349813e-06, |
|
"loss": 0.034, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.387900355871886, |
|
"grad_norm": 1.5326873490460138, |
|
"learning_rate": 2.1384811041894055e-06, |
|
"loss": 0.0625, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.3896797153024911, |
|
"grad_norm": 1.0927862709364635, |
|
"learning_rate": 2.1270321674516736e-06, |
|
"loss": 0.0353, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.3914590747330962, |
|
"grad_norm": 1.1685499642911594, |
|
"learning_rate": 2.1156056745766593e-06, |
|
"loss": 0.0311, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.3932384341637012, |
|
"grad_norm": 1.1440317821731667, |
|
"learning_rate": 2.104201714829074e-06, |
|
"loss": 0.0519, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.395017793594306, |
|
"grad_norm": 1.1913543473519337, |
|
"learning_rate": 2.0928203772975917e-06, |
|
"loss": 0.0386, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.396797153024911, |
|
"grad_norm": 1.0574982400372954, |
|
"learning_rate": 2.081461750894166e-06, |
|
"loss": 0.0372, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.398576512455516, |
|
"grad_norm": 1.0339424499797196, |
|
"learning_rate": 2.070125924353328e-06, |
|
"loss": 0.033, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.4003558718861209, |
|
"grad_norm": 1.0026450828087805, |
|
"learning_rate": 2.058812986231493e-06, |
|
"loss": 0.0387, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.402135231316726, |
|
"grad_norm": 1.0986279328743036, |
|
"learning_rate": 2.0475230249062727e-06, |
|
"loss": 0.0387, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.403914590747331, |
|
"grad_norm": 1.0153621401507438, |
|
"learning_rate": 2.0362561285757766e-06, |
|
"loss": 0.0427, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.405693950177936, |
|
"grad_norm": 1.0543193871513072, |
|
"learning_rate": 2.0250123852579347e-06, |
|
"loss": 0.0416, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.407473309608541, |
|
"grad_norm": 0.8982894726021952, |
|
"learning_rate": 2.013791882789801e-06, |
|
"loss": 0.0332, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.4092526690391458, |
|
"grad_norm": 0.764278598078205, |
|
"learning_rate": 2.0025947088268714e-06, |
|
"loss": 0.0214, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.4110320284697508, |
|
"grad_norm": 1.1312386366121716, |
|
"learning_rate": 1.9914209508423943e-06, |
|
"loss": 0.0355, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.4128113879003559, |
|
"grad_norm": 1.845461658308916, |
|
"learning_rate": 1.9802706961266936e-06, |
|
"loss": 0.0328, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.414590747330961, |
|
"grad_norm": 1.4441491631416752, |
|
"learning_rate": 1.969144031786483e-06, |
|
"loss": 0.05, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.416370106761566, |
|
"grad_norm": 0.9832445602247802, |
|
"learning_rate": 1.958041044744186e-06, |
|
"loss": 0.0432, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.4181494661921707, |
|
"grad_norm": 0.9992019909245634, |
|
"learning_rate": 1.94696182173726e-06, |
|
"loss": 0.0296, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.4199288256227758, |
|
"grad_norm": 1.1131949231431502, |
|
"learning_rate": 1.9359064493175077e-06, |
|
"loss": 0.0417, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.4217081850533808, |
|
"grad_norm": 0.8124196747997232, |
|
"learning_rate": 1.9248750138504176e-06, |
|
"loss": 0.0324, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.4234875444839858, |
|
"grad_norm": 1.105657266423107, |
|
"learning_rate": 1.9138676015144765e-06, |
|
"loss": 0.0278, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.4234875444839858, |
|
"eval_loss": 0.10011839866638184, |
|
"eval_runtime": 7.101, |
|
"eval_samples_per_second": 6.478, |
|
"eval_steps_per_second": 1.69, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.4252669039145909, |
|
"grad_norm": 1.1223048480525009, |
|
"learning_rate": 1.9028842983005036e-06, |
|
"loss": 0.0451, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.4270462633451957, |
|
"grad_norm": 1.4205127346023394, |
|
"learning_rate": 1.8919251900109697e-06, |
|
"loss": 0.0504, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.4288256227758007, |
|
"grad_norm": 0.9551371900335774, |
|
"learning_rate": 1.8809903622593395e-06, |
|
"loss": 0.0316, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.4306049822064058, |
|
"grad_norm": 0.9603210270587519, |
|
"learning_rate": 1.870079900469392e-06, |
|
"loss": 0.0331, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.4323843416370106, |
|
"grad_norm": 1.1441463847442797, |
|
"learning_rate": 1.8591938898745593e-06, |
|
"loss": 0.0458, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.4341637010676156, |
|
"grad_norm": 1.040056545705429, |
|
"learning_rate": 1.8483324155172594e-06, |
|
"loss": 0.0366, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.4359430604982206, |
|
"grad_norm": 1.0254357125248748, |
|
"learning_rate": 1.837495562248226e-06, |
|
"loss": 0.04, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.4377224199288257, |
|
"grad_norm": 1.196513107538604, |
|
"learning_rate": 1.8266834147258577e-06, |
|
"loss": 0.0419, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.4395017793594307, |
|
"grad_norm": 0.9973737532812229, |
|
"learning_rate": 1.8158960574155455e-06, |
|
"loss": 0.039, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.4412811387900355, |
|
"grad_norm": 1.260661368620348, |
|
"learning_rate": 1.8051335745890196e-06, |
|
"loss": 0.0363, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.4430604982206405, |
|
"grad_norm": 1.23676879977996, |
|
"learning_rate": 1.7943960503236856e-06, |
|
"loss": 0.0494, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.4448398576512456, |
|
"grad_norm": 1.0625123538977326, |
|
"learning_rate": 1.7836835685019732e-06, |
|
"loss": 0.0332, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.4466192170818506, |
|
"grad_norm": 0.9472292065029239, |
|
"learning_rate": 1.7729962128106787e-06, |
|
"loss": 0.0323, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.4483985765124556, |
|
"grad_norm": 1.4248192571994371, |
|
"learning_rate": 1.7623340667403089e-06, |
|
"loss": 0.0315, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.4501779359430604, |
|
"grad_norm": 1.4293948516636514, |
|
"learning_rate": 1.7516972135844352e-06, |
|
"loss": 0.0472, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.4519572953736655, |
|
"grad_norm": 0.9423611909080196, |
|
"learning_rate": 1.741085736439031e-06, |
|
"loss": 0.031, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.4537366548042705, |
|
"grad_norm": 1.112081949124178, |
|
"learning_rate": 1.730499718201838e-06, |
|
"loss": 0.0338, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.4555160142348753, |
|
"grad_norm": 1.0649779134520136, |
|
"learning_rate": 1.7199392415717064e-06, |
|
"loss": 0.0345, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.4572953736654806, |
|
"grad_norm": 1.0025619247353015, |
|
"learning_rate": 1.7094043890479557e-06, |
|
"loss": 0.0349, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.4590747330960854, |
|
"grad_norm": 0.9757537337037903, |
|
"learning_rate": 1.698895242929725e-06, |
|
"loss": 0.0324, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.4608540925266904, |
|
"grad_norm": 1.0266548343772863, |
|
"learning_rate": 1.6884118853153358e-06, |
|
"loss": 0.0334, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.4626334519572954, |
|
"grad_norm": 1.1103625592225188, |
|
"learning_rate": 1.6779543981016478e-06, |
|
"loss": 0.0344, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.4644128113879002, |
|
"grad_norm": 1.0274849399572612, |
|
"learning_rate": 1.6675228629834133e-06, |
|
"loss": 0.0313, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.4661921708185053, |
|
"grad_norm": 0.8735158300602728, |
|
"learning_rate": 1.657117361452651e-06, |
|
"loss": 0.034, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.4679715302491103, |
|
"grad_norm": 0.9133025030407297, |
|
"learning_rate": 1.6467379747980011e-06, |
|
"loss": 0.0337, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.4697508896797153, |
|
"grad_norm": 0.8647627812106672, |
|
"learning_rate": 1.6363847841040914e-06, |
|
"loss": 0.027, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.4715302491103204, |
|
"grad_norm": 1.0457503277435742, |
|
"learning_rate": 1.626057870250906e-06, |
|
"loss": 0.0362, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.4733096085409252, |
|
"grad_norm": 0.9201434240593671, |
|
"learning_rate": 1.6157573139131527e-06, |
|
"loss": 0.0324, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.4750889679715302, |
|
"grad_norm": 0.9025907510187846, |
|
"learning_rate": 1.605483195559628e-06, |
|
"loss": 0.0308, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.4768683274021353, |
|
"grad_norm": 1.1239748000474326, |
|
"learning_rate": 1.5952355954525966e-06, |
|
"loss": 0.0385, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.4786476868327403, |
|
"grad_norm": 0.926934155029106, |
|
"learning_rate": 1.5850145936471607e-06, |
|
"loss": 0.0295, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.4804270462633453, |
|
"grad_norm": 1.180459911662068, |
|
"learning_rate": 1.5748202699906335e-06, |
|
"loss": 0.0424, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.4822064056939501, |
|
"grad_norm": 1.1055087892951438, |
|
"learning_rate": 1.5646527041219128e-06, |
|
"loss": 0.0346, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.4839857651245552, |
|
"grad_norm": 0.8184632569092138, |
|
"learning_rate": 1.5545119754708682e-06, |
|
"loss": 0.0277, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.4857651245551602, |
|
"grad_norm": 1.2935255738057154, |
|
"learning_rate": 1.544398163257711e-06, |
|
"loss": 0.0362, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.487544483985765, |
|
"grad_norm": 0.8282668217520502, |
|
"learning_rate": 1.5343113464923808e-06, |
|
"loss": 0.0273, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.48932384341637, |
|
"grad_norm": 0.9687052531013239, |
|
"learning_rate": 1.524251603973927e-06, |
|
"loss": 0.0294, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.491103202846975, |
|
"grad_norm": 0.9369324470401119, |
|
"learning_rate": 1.5142190142898883e-06, |
|
"loss": 0.0248, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.49288256227758, |
|
"grad_norm": 1.340795844446974, |
|
"learning_rate": 1.5042136558156883e-06, |
|
"loss": 0.0418, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.4946619217081851, |
|
"grad_norm": 1.04178677208075, |
|
"learning_rate": 1.4942356067140162e-06, |
|
"loss": 0.0426, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.49644128113879, |
|
"grad_norm": 0.7815078913435625, |
|
"learning_rate": 1.4842849449342195e-06, |
|
"loss": 0.0268, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.498220640569395, |
|
"grad_norm": 1.0268859578445484, |
|
"learning_rate": 1.4743617482116896e-06, |
|
"loss": 0.0342, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.4179528664654018, |
|
"learning_rate": 1.4644660940672628e-06, |
|
"loss": 0.0414, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.501779359430605, |
|
"grad_norm": 1.2882672733486225, |
|
"learning_rate": 1.454598059806609e-06, |
|
"loss": 0.052, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.50355871886121, |
|
"grad_norm": 1.239733062685239, |
|
"learning_rate": 1.4447577225196296e-06, |
|
"loss": 0.0386, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.5053380782918149, |
|
"grad_norm": 1.1205130913664423, |
|
"learning_rate": 1.4349451590798564e-06, |
|
"loss": 0.0406, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.50711743772242, |
|
"grad_norm": 1.051210484845322, |
|
"learning_rate": 1.4251604461438444e-06, |
|
"loss": 0.0362, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.508896797153025, |
|
"grad_norm": 0.8847322952224552, |
|
"learning_rate": 1.4154036601505834e-06, |
|
"loss": 0.0272, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.5106761565836297, |
|
"grad_norm": 1.0619766922271963, |
|
"learning_rate": 1.4056748773208933e-06, |
|
"loss": 0.0401, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.512455516014235, |
|
"grad_norm": 1.1060287420934807, |
|
"learning_rate": 1.3959741736568339e-06, |
|
"loss": 0.0404, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.5142348754448398, |
|
"grad_norm": 0.8186830800204293, |
|
"learning_rate": 1.3863016249411027e-06, |
|
"loss": 0.0284, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.5160142348754448, |
|
"grad_norm": 0.9811209567376619, |
|
"learning_rate": 1.376657306736453e-06, |
|
"loss": 0.0338, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.5177935943060499, |
|
"grad_norm": 1.0166513565401378, |
|
"learning_rate": 1.3670412943850975e-06, |
|
"loss": 0.0322, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.5195729537366547, |
|
"grad_norm": 1.0613533337993255, |
|
"learning_rate": 1.3574536630081208e-06, |
|
"loss": 0.0345, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.52135231316726, |
|
"grad_norm": 1.298699049979455, |
|
"learning_rate": 1.347894487504896e-06, |
|
"loss": 0.0494, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.5231316725978647, |
|
"grad_norm": 0.9174175793366299, |
|
"learning_rate": 1.3383638425524909e-06, |
|
"loss": 0.0279, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.5249110320284698, |
|
"grad_norm": 0.965437792961804, |
|
"learning_rate": 1.3288618026050943e-06, |
|
"loss": 0.034, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.5266903914590748, |
|
"grad_norm": 1.0669747654101223, |
|
"learning_rate": 1.31938844189343e-06, |
|
"loss": 0.029, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.5284697508896796, |
|
"grad_norm": 1.3030868224716827, |
|
"learning_rate": 1.3099438344241777e-06, |
|
"loss": 0.0468, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.5302491103202847, |
|
"grad_norm": 1.0513380986970275, |
|
"learning_rate": 1.3005280539793908e-06, |
|
"loss": 0.0356, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.5320284697508897, |
|
"grad_norm": 1.087731657244315, |
|
"learning_rate": 1.2911411741159273e-06, |
|
"loss": 0.0494, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.5338078291814945, |
|
"grad_norm": 0.8823004493051066, |
|
"learning_rate": 1.2817832681648712e-06, |
|
"loss": 0.0344, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.5355871886120998, |
|
"grad_norm": 1.132213682003108, |
|
"learning_rate": 1.2724544092309581e-06, |
|
"loss": 0.0438, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.5373665480427046, |
|
"grad_norm": 0.9056107608256295, |
|
"learning_rate": 1.2631546701920073e-06, |
|
"loss": 0.0243, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.5391459074733096, |
|
"grad_norm": 1.0966686358672375, |
|
"learning_rate": 1.2538841236983519e-06, |
|
"loss": 0.0436, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.5409252669039146, |
|
"grad_norm": 1.3981334959221237, |
|
"learning_rate": 1.244642842172266e-06, |
|
"loss": 0.0424, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.5427046263345194, |
|
"grad_norm": 1.0011277140247434, |
|
"learning_rate": 1.2354308978074088e-06, |
|
"loss": 0.0382, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.5444839857651247, |
|
"grad_norm": 1.068070314434391, |
|
"learning_rate": 1.2262483625682514e-06, |
|
"loss": 0.0358, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.5462633451957295, |
|
"grad_norm": 0.9081834043759001, |
|
"learning_rate": 1.2170953081895214e-06, |
|
"loss": 0.0302, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.5480427046263345, |
|
"grad_norm": 1.1560998983516815, |
|
"learning_rate": 1.2079718061756369e-06, |
|
"loss": 0.0368, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.5498220640569396, |
|
"grad_norm": 1.1106398597054559, |
|
"learning_rate": 1.1988779278001517e-06, |
|
"loss": 0.0308, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.5516014234875444, |
|
"grad_norm": 0.8800192309569215, |
|
"learning_rate": 1.1898137441051982e-06, |
|
"loss": 0.0265, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.5533807829181496, |
|
"grad_norm": 1.1190822711373576, |
|
"learning_rate": 1.1807793259009282e-06, |
|
"loss": 0.0463, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.5551601423487544, |
|
"grad_norm": 1.1792104977286275, |
|
"learning_rate": 1.1717747437649657e-06, |
|
"loss": 0.04, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.5569395017793595, |
|
"grad_norm": 1.126018495041347, |
|
"learning_rate": 1.1628000680418533e-06, |
|
"loss": 0.034, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.5587188612099645, |
|
"grad_norm": 0.8248236560275165, |
|
"learning_rate": 1.1538553688425002e-06, |
|
"loss": 0.0243, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.5604982206405693, |
|
"grad_norm": 1.1587019887937444, |
|
"learning_rate": 1.14494071604364e-06, |
|
"loss": 0.0398, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.5622775800711743, |
|
"grad_norm": 1.0273512446925177, |
|
"learning_rate": 1.1360561792872754e-06, |
|
"loss": 0.027, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.5640569395017794, |
|
"grad_norm": 1.3153238589857665, |
|
"learning_rate": 1.127201827980145e-06, |
|
"loss": 0.0337, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.5658362989323842, |
|
"grad_norm": 0.9699670080209463, |
|
"learning_rate": 1.1183777312931748e-06, |
|
"loss": 0.0269, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.5676156583629894, |
|
"grad_norm": 0.7514798550481694, |
|
"learning_rate": 1.1095839581609407e-06, |
|
"loss": 0.0208, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.5693950177935942, |
|
"grad_norm": 1.149225029646793, |
|
"learning_rate": 1.1008205772811248e-06, |
|
"loss": 0.0427, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.5711743772241993, |
|
"grad_norm": 1.3121126133489633, |
|
"learning_rate": 1.0920876571139843e-06, |
|
"loss": 0.0529, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.5729537366548043, |
|
"grad_norm": 0.7797748914772957, |
|
"learning_rate": 1.0833852658818167e-06, |
|
"loss": 0.0336, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.5747330960854091, |
|
"grad_norm": 0.9142606241220376, |
|
"learning_rate": 1.0747134715684221e-06, |
|
"loss": 0.0214, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.5765124555160144, |
|
"grad_norm": 1.2294604909040106, |
|
"learning_rate": 1.0660723419185776e-06, |
|
"loss": 0.0434, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.5782918149466192, |
|
"grad_norm": 1.2455489826583874, |
|
"learning_rate": 1.0574619444375017e-06, |
|
"loss": 0.0397, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.5800711743772242, |
|
"grad_norm": 0.7398353995921684, |
|
"learning_rate": 1.0488823463903341e-06, |
|
"loss": 0.0221, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.5818505338078293, |
|
"grad_norm": 1.0370224497854166, |
|
"learning_rate": 1.0403336148016053e-06, |
|
"loss": 0.0352, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.583629893238434, |
|
"grad_norm": 0.9348165169356273, |
|
"learning_rate": 1.0318158164547159e-06, |
|
"loss": 0.0319, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.585409252669039, |
|
"grad_norm": 1.0924414340577266, |
|
"learning_rate": 1.0233290178914096e-06, |
|
"loss": 0.0266, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.5871886120996441, |
|
"grad_norm": 0.9536100341218937, |
|
"learning_rate": 1.014873285411262e-06, |
|
"loss": 0.0306, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.5889679715302492, |
|
"grad_norm": 1.2804237474045312, |
|
"learning_rate": 1.006448685071154e-06, |
|
"loss": 0.0483, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.5907473309608542, |
|
"grad_norm": 1.2570564131314819, |
|
"learning_rate": 9.980552826847635e-07, |
|
"loss": 0.0547, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.592526690391459, |
|
"grad_norm": 1.0068008147398708, |
|
"learning_rate": 9.896931438220453e-07, |
|
"loss": 0.0287, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.594306049822064, |
|
"grad_norm": 1.1044241841144185, |
|
"learning_rate": 9.813623338087181e-07, |
|
"loss": 0.0443, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.596085409252669, |
|
"grad_norm": 1.4269365587701714, |
|
"learning_rate": 9.730629177257623e-07, |
|
"loss": 0.0376, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.5978647686832739, |
|
"grad_norm": 0.9934873275930318, |
|
"learning_rate": 9.64794960408903e-07, |
|
"loss": 0.0288, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.5996441281138791, |
|
"grad_norm": 1.5358973821126876, |
|
"learning_rate": 9.565585264481092e-07, |
|
"loss": 0.0627, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.601423487544484, |
|
"grad_norm": 1.1131303607576086, |
|
"learning_rate": 9.483536801870835e-07, |
|
"loss": 0.0298, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.603202846975089, |
|
"grad_norm": 0.9336168197435843, |
|
"learning_rate": 9.401804857227648e-07, |
|
"loss": 0.0335, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.604982206405694, |
|
"grad_norm": 0.8406252190050231, |
|
"learning_rate": 9.320390069048258e-07, |
|
"loss": 0.0267, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.6067615658362988, |
|
"grad_norm": 1.000464396420974, |
|
"learning_rate": 9.239293073351735e-07, |
|
"loss": 0.0366, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.608540925266904, |
|
"grad_norm": 1.0783875335623725, |
|
"learning_rate": 9.158514503674543e-07, |
|
"loss": 0.0309, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.6103202846975089, |
|
"grad_norm": 0.8032493914104066, |
|
"learning_rate": 9.078054991065532e-07, |
|
"loss": 0.0268, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.612099644128114, |
|
"grad_norm": 1.1398559093924248, |
|
"learning_rate": 8.997915164081095e-07, |
|
"loss": 0.0316, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.613879003558719, |
|
"grad_norm": 1.219756114061941, |
|
"learning_rate": 8.918095648780195e-07, |
|
"loss": 0.0394, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.6156583629893237, |
|
"grad_norm": 1.2443525419299477, |
|
"learning_rate": 8.838597068719518e-07, |
|
"loss": 0.0327, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.6174377224199288, |
|
"grad_norm": 1.0061667871076212, |
|
"learning_rate": 8.75942004494853e-07, |
|
"loss": 0.0305, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.6192170818505338, |
|
"grad_norm": 1.016099535054865, |
|
"learning_rate": 8.680565196004704e-07, |
|
"loss": 0.0273, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.6209964412811388, |
|
"grad_norm": 0.8918550291636964, |
|
"learning_rate": 8.602033137908666e-07, |
|
"loss": 0.0289, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.6227758007117439, |
|
"grad_norm": 1.1020058847219514, |
|
"learning_rate": 8.523824484159348e-07, |
|
"loss": 0.0276, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.6245551601423487, |
|
"grad_norm": 0.9425473680422246, |
|
"learning_rate": 8.445939845729245e-07, |
|
"loss": 0.0297, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.6263345195729537, |
|
"grad_norm": 0.855209514922154, |
|
"learning_rate": 8.368379831059592e-07, |
|
"loss": 0.0285, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.6281138790035588, |
|
"grad_norm": 0.875295007309283, |
|
"learning_rate": 8.29114504605566e-07, |
|
"loss": 0.0312, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.6298932384341636, |
|
"grad_norm": 1.074139716960315, |
|
"learning_rate": 8.21423609408199e-07, |
|
"loss": 0.0296, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.6316725978647688, |
|
"grad_norm": 1.0661503372683163, |
|
"learning_rate": 8.137653575957666e-07, |
|
"loss": 0.033, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.6334519572953736, |
|
"grad_norm": 1.321890008399842, |
|
"learning_rate": 8.061398089951678e-07, |
|
"loss": 0.0508, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.6352313167259787, |
|
"grad_norm": 1.1936725902121832, |
|
"learning_rate": 7.985470231778203e-07, |
|
"loss": 0.0291, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.6370106761565837, |
|
"grad_norm": 1.2543619838127669, |
|
"learning_rate": 7.909870594591951e-07, |
|
"loss": 0.0353, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.6387900355871885, |
|
"grad_norm": 1.2807630998878292, |
|
"learning_rate": 7.834599768983553e-07, |
|
"loss": 0.044, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.6405693950177938, |
|
"grad_norm": 0.8647843192924516, |
|
"learning_rate": 7.759658342974951e-07, |
|
"loss": 0.0297, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.6423487544483986, |
|
"grad_norm": 1.045927631428199, |
|
"learning_rate": 7.685046902014747e-07, |
|
"loss": 0.0339, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.6441281138790036, |
|
"grad_norm": 1.2856227261366628, |
|
"learning_rate": 7.61076602897371e-07, |
|
"loss": 0.0439, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.6459074733096086, |
|
"grad_norm": 0.7181552931762002, |
|
"learning_rate": 7.536816304140177e-07, |
|
"loss": 0.0208, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.6476868327402134, |
|
"grad_norm": 1.1520526870141794, |
|
"learning_rate": 7.46319830521553e-07, |
|
"loss": 0.0312, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.6494661921708185, |
|
"grad_norm": 0.7459327967978033, |
|
"learning_rate": 7.389912607309662e-07, |
|
"loss": 0.0173, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.6512455516014235, |
|
"grad_norm": 1.506110496832405, |
|
"learning_rate": 7.316959782936516e-07, |
|
"loss": 0.0524, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.6530249110320283, |
|
"grad_norm": 0.7670586590573035, |
|
"learning_rate": 7.244340402009608e-07, |
|
"loss": 0.0231, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.6548042704626336, |
|
"grad_norm": 1.2544705947152368, |
|
"learning_rate": 7.172055031837572e-07, |
|
"loss": 0.0339, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.6565836298932384, |
|
"grad_norm": 1.18690949474137, |
|
"learning_rate": 7.100104237119676e-07, |
|
"loss": 0.0427, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.6583629893238434, |
|
"grad_norm": 0.8677239409228932, |
|
"learning_rate": 7.028488579941506e-07, |
|
"loss": 0.0322, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.6601423487544484, |
|
"grad_norm": 1.4058881718421903, |
|
"learning_rate": 6.957208619770505e-07, |
|
"loss": 0.0404, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.6619217081850532, |
|
"grad_norm": 1.052649759973057, |
|
"learning_rate": 6.886264913451635e-07, |
|
"loss": 0.034, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.6637010676156585, |
|
"grad_norm": 1.4589989936482344, |
|
"learning_rate": 6.815658015203014e-07, |
|
"loss": 0.0539, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.6654804270462633, |
|
"grad_norm": 0.9522891072025225, |
|
"learning_rate": 6.745388476611553e-07, |
|
"loss": 0.0272, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.6672597864768683, |
|
"grad_norm": 1.163039240628607, |
|
"learning_rate": 6.67545684662873e-07, |
|
"loss": 0.0355, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.6690391459074734, |
|
"grad_norm": 0.859454878720822, |
|
"learning_rate": 6.605863671566221e-07, |
|
"loss": 0.0323, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.6708185053380782, |
|
"grad_norm": 0.9967503224357394, |
|
"learning_rate": 6.536609495091695e-07, |
|
"loss": 0.0262, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.6725978647686834, |
|
"grad_norm": 1.4826541409157528, |
|
"learning_rate": 6.467694858224488e-07, |
|
"loss": 0.0485, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.6743772241992882, |
|
"grad_norm": 1.2151931700249952, |
|
"learning_rate": 6.399120299331468e-07, |
|
"loss": 0.0407, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.6761565836298933, |
|
"grad_norm": 0.8607645433700678, |
|
"learning_rate": 6.330886354122768e-07, |
|
"loss": 0.0343, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.6779359430604983, |
|
"grad_norm": 0.7860500951563195, |
|
"learning_rate": 6.262993555647617e-07, |
|
"loss": 0.0242, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.6797153024911031, |
|
"grad_norm": 1.0243406173326945, |
|
"learning_rate": 6.1954424342902e-07, |
|
"loss": 0.024, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.6814946619217082, |
|
"grad_norm": 1.0907873097370808, |
|
"learning_rate": 6.128233517765448e-07, |
|
"loss": 0.0352, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.6832740213523132, |
|
"grad_norm": 0.9529001185243109, |
|
"learning_rate": 6.061367331114992e-07, |
|
"loss": 0.0301, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.685053380782918, |
|
"grad_norm": 1.1559262047776806, |
|
"learning_rate": 5.994844396703025e-07, |
|
"loss": 0.0353, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.6868327402135233, |
|
"grad_norm": 0.9253590784085639, |
|
"learning_rate": 5.928665234212233e-07, |
|
"loss": 0.0244, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.688612099644128, |
|
"grad_norm": 0.854850201288866, |
|
"learning_rate": 5.862830360639698e-07, |
|
"loss": 0.0248, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.690391459074733, |
|
"grad_norm": 1.3742981531929324, |
|
"learning_rate": 5.797340290292907e-07, |
|
"loss": 0.0366, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.6921708185053381, |
|
"grad_norm": 0.917796132480759, |
|
"learning_rate": 5.732195534785723e-07, |
|
"loss": 0.0321, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.693950177935943, |
|
"grad_norm": 0.6935753086958434, |
|
"learning_rate": 5.667396603034369e-07, |
|
"loss": 0.0249, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.6957295373665482, |
|
"grad_norm": 1.1247915820923213, |
|
"learning_rate": 5.602944001253486e-07, |
|
"loss": 0.034, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.697508896797153, |
|
"grad_norm": 1.1174616486574385, |
|
"learning_rate": 5.538838232952104e-07, |
|
"loss": 0.0328, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.699288256227758, |
|
"grad_norm": 1.35393002215958, |
|
"learning_rate": 5.475079798929816e-07, |
|
"loss": 0.0384, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.701067615658363, |
|
"grad_norm": 0.7691879008353513, |
|
"learning_rate": 5.411669197272795e-07, |
|
"loss": 0.0183, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.7028469750889679, |
|
"grad_norm": 1.0981936797185596, |
|
"learning_rate": 5.348606923349903e-07, |
|
"loss": 0.0283, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.704626334519573, |
|
"grad_norm": 0.8560174445858693, |
|
"learning_rate": 5.285893469808855e-07, |
|
"loss": 0.0232, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.706405693950178, |
|
"grad_norm": 0.977437018531457, |
|
"learning_rate": 5.223529326572352e-07, |
|
"loss": 0.0315, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.708185053380783, |
|
"grad_norm": 1.0238486681977492, |
|
"learning_rate": 5.161514980834232e-07, |
|
"loss": 0.0319, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.709964412811388, |
|
"grad_norm": 1.028593454460307, |
|
"learning_rate": 5.099850917055709e-07, |
|
"loss": 0.0296, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.7117437722419928, |
|
"grad_norm": 1.197853331879787, |
|
"learning_rate": 5.038537616961559e-07, |
|
"loss": 0.0403, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.7135231316725978, |
|
"grad_norm": 1.0281194819075867, |
|
"learning_rate": 4.977575559536358e-07, |
|
"loss": 0.0327, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.7153024911032029, |
|
"grad_norm": 1.0202589732465308, |
|
"learning_rate": 4.916965221020753e-07, |
|
"loss": 0.0258, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.7170818505338077, |
|
"grad_norm": 0.9742522505557961, |
|
"learning_rate": 4.856707074907729e-07, |
|
"loss": 0.0301, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.718861209964413, |
|
"grad_norm": 1.6855311169508895, |
|
"learning_rate": 4.796801591938922e-07, |
|
"loss": 0.0666, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.7206405693950177, |
|
"grad_norm": 1.1342278579742222, |
|
"learning_rate": 4.737249240100911e-07, |
|
"loss": 0.027, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.7224199288256228, |
|
"grad_norm": 1.2947935715011978, |
|
"learning_rate": 4.6780504846216155e-07, |
|
"loss": 0.0392, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.7241992882562278, |
|
"grad_norm": 0.8423666826850359, |
|
"learning_rate": 4.619205787966613e-07, |
|
"loss": 0.0219, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.7259786476868326, |
|
"grad_norm": 0.8356251229618374, |
|
"learning_rate": 4.560715609835548e-07, |
|
"loss": 0.0295, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.7277580071174379, |
|
"grad_norm": 0.975413555854705, |
|
"learning_rate": 4.5025804071585464e-07, |
|
"loss": 0.031, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.7295373665480427, |
|
"grad_norm": 1.7103144683383302, |
|
"learning_rate": 4.4448006340926163e-07, |
|
"loss": 0.0756, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.7313167259786477, |
|
"grad_norm": 1.1397574851612526, |
|
"learning_rate": 4.3873767420181344e-07, |
|
"loss": 0.0383, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.7330960854092528, |
|
"grad_norm": 1.258638233810894, |
|
"learning_rate": 4.3303091795353024e-07, |
|
"loss": 0.0494, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.7348754448398576, |
|
"grad_norm": 1.166367648099526, |
|
"learning_rate": 4.2735983924606596e-07, |
|
"loss": 0.0355, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.7366548042704626, |
|
"grad_norm": 0.9944116877522787, |
|
"learning_rate": 4.2172448238235464e-07, |
|
"loss": 0.0248, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.7384341637010676, |
|
"grad_norm": 1.814490357944255, |
|
"learning_rate": 4.161248913862731e-07, |
|
"loss": 0.0507, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.7402135231316724, |
|
"grad_norm": 1.293279048974226, |
|
"learning_rate": 4.1056111000228937e-07, |
|
"loss": 0.0383, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.7419928825622777, |
|
"grad_norm": 1.2295224161963847, |
|
"learning_rate": 4.0503318169512417e-07, |
|
"loss": 0.0432, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.7437722419928825, |
|
"grad_norm": 0.8857506366282123, |
|
"learning_rate": 3.9954114964941336e-07, |
|
"loss": 0.0214, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.7455516014234875, |
|
"grad_norm": 1.266266943648353, |
|
"learning_rate": 3.9408505676936327e-07, |
|
"loss": 0.047, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.7473309608540926, |
|
"grad_norm": 1.0492016154764805, |
|
"learning_rate": 3.886649456784253e-07, |
|
"loss": 0.0309, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.7491103202846974, |
|
"grad_norm": 0.8773806773897487, |
|
"learning_rate": 3.8328085871895624e-07, |
|
"loss": 0.0256, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.7508896797153026, |
|
"grad_norm": 0.9008389846529217, |
|
"learning_rate": 3.779328379518898e-07, |
|
"loss": 0.0301, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.7526690391459074, |
|
"grad_norm": 1.0180931238883257, |
|
"learning_rate": 3.7262092515640556e-07, |
|
"loss": 0.0344, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.7544483985765125, |
|
"grad_norm": 0.9014332383947364, |
|
"learning_rate": 3.673451618296081e-07, |
|
"loss": 0.033, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.7562277580071175, |
|
"grad_norm": 1.0943685208484006, |
|
"learning_rate": 3.621055891861963e-07, |
|
"loss": 0.0387, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.7580071174377223, |
|
"grad_norm": 0.9670358588722275, |
|
"learning_rate": 3.56902248158148e-07, |
|
"loss": 0.0301, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.7597864768683276, |
|
"grad_norm": 0.8586037467588032, |
|
"learning_rate": 3.517351793943913e-07, |
|
"loss": 0.0189, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.7615658362989324, |
|
"grad_norm": 0.9853627154020785, |
|
"learning_rate": 3.4660442326049704e-07, |
|
"loss": 0.026, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.7633451957295374, |
|
"grad_norm": 1.0460384481622562, |
|
"learning_rate": 3.4151001983835696e-07, |
|
"loss": 0.0288, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.7651245551601424, |
|
"grad_norm": 1.155688394879773, |
|
"learning_rate": 3.364520089258727e-07, |
|
"loss": 0.0345, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.7669039145907472, |
|
"grad_norm": 1.2170330035202128, |
|
"learning_rate": 3.314304300366461e-07, |
|
"loss": 0.0389, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.7686832740213523, |
|
"grad_norm": 0.9774165423185093, |
|
"learning_rate": 3.2644532239966444e-07, |
|
"loss": 0.0314, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.7704626334519573, |
|
"grad_norm": 0.8024228200182462, |
|
"learning_rate": 3.2149672495900286e-07, |
|
"loss": 0.0228, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.7722419928825621, |
|
"grad_norm": 0.9169374130258652, |
|
"learning_rate": 3.165846763735153e-07, |
|
"loss": 0.0313, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.7740213523131674, |
|
"grad_norm": 1.1259893864205517, |
|
"learning_rate": 3.117092150165324e-07, |
|
"loss": 0.0369, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.7758007117437722, |
|
"grad_norm": 1.0091694614427942, |
|
"learning_rate": 3.068703789755606e-07, |
|
"loss": 0.0258, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.7775800711743772, |
|
"grad_norm": 1.3271059285956883, |
|
"learning_rate": 3.020682060519886e-07, |
|
"loss": 0.0328, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 1.7793594306049823, |
|
"grad_norm": 1.1325727530252554, |
|
"learning_rate": 2.9730273376078923e-07, |
|
"loss": 0.0421, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7793594306049823, |
|
"eval_loss": 0.0947481095790863, |
|
"eval_runtime": 7.1085, |
|
"eval_samples_per_second": 6.471, |
|
"eval_steps_per_second": 1.688, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.781138790035587, |
|
"grad_norm": 1.9072065524139952, |
|
"learning_rate": 2.9257399933022737e-07, |
|
"loss": 0.0609, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 1.7829181494661923, |
|
"grad_norm": 1.5340153928789177, |
|
"learning_rate": 2.8788203970156805e-07, |
|
"loss": 0.0285, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.7846975088967971, |
|
"grad_norm": 0.9736606516974632, |
|
"learning_rate": 2.832268915287878e-07, |
|
"loss": 0.0322, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 1.7864768683274022, |
|
"grad_norm": 1.2845746239870501, |
|
"learning_rate": 2.7860859117828985e-07, |
|
"loss": 0.0411, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.7882562277580072, |
|
"grad_norm": 1.5114841837530466, |
|
"learning_rate": 2.740271747286194e-07, |
|
"loss": 0.0365, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.790035587188612, |
|
"grad_norm": 1.0805661038226546, |
|
"learning_rate": 2.6948267797018145e-07, |
|
"loss": 0.0322, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.791814946619217, |
|
"grad_norm": 1.1622087715052343, |
|
"learning_rate": 2.649751364049613e-07, |
|
"loss": 0.0309, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 1.793594306049822, |
|
"grad_norm": 0.8952136711456726, |
|
"learning_rate": 2.6050458524624735e-07, |
|
"loss": 0.024, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.795373665480427, |
|
"grad_norm": 1.0984221139826034, |
|
"learning_rate": 2.560710594183552e-07, |
|
"loss": 0.031, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 1.7971530249110321, |
|
"grad_norm": 1.2052617116658841, |
|
"learning_rate": 2.5167459355635524e-07, |
|
"loss": 0.0461, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.798932384341637, |
|
"grad_norm": 1.2451605519007105, |
|
"learning_rate": 2.473152220058039e-07, |
|
"loss": 0.0327, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 1.800711743772242, |
|
"grad_norm": 1.0244966281303112, |
|
"learning_rate": 2.429929788224722e-07, |
|
"loss": 0.0261, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.802491103202847, |
|
"grad_norm": 1.3171063967437255, |
|
"learning_rate": 2.38707897772083e-07, |
|
"loss": 0.0482, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 1.8042704626334518, |
|
"grad_norm": 1.3502749238085892, |
|
"learning_rate": 2.3446001233004333e-07, |
|
"loss": 0.0549, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.806049822064057, |
|
"grad_norm": 0.8542766947050513, |
|
"learning_rate": 2.3024935568118745e-07, |
|
"loss": 0.0235, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.8078291814946619, |
|
"grad_norm": 0.9972891613288003, |
|
"learning_rate": 2.2607596071951288e-07, |
|
"loss": 0.0383, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.809608540925267, |
|
"grad_norm": 0.7721878506350839, |
|
"learning_rate": 2.2193986004792667e-07, |
|
"loss": 0.022, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 1.811387900355872, |
|
"grad_norm": 0.690046088755805, |
|
"learning_rate": 2.1784108597799058e-07, |
|
"loss": 0.0206, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.8131672597864767, |
|
"grad_norm": 0.9469380483577621, |
|
"learning_rate": 2.1377967052966685e-07, |
|
"loss": 0.0231, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 1.814946619217082, |
|
"grad_norm": 0.9143973488830226, |
|
"learning_rate": 2.0975564543107007e-07, |
|
"loss": 0.025, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.8167259786476868, |
|
"grad_norm": 1.136773591207416, |
|
"learning_rate": 2.057690421182168e-07, |
|
"loss": 0.0352, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 1.8185053380782918, |
|
"grad_norm": 0.9493464713193047, |
|
"learning_rate": 2.01819891734783e-07, |
|
"loss": 0.0298, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.8202846975088969, |
|
"grad_norm": 1.1507856621284378, |
|
"learning_rate": 1.979082251318576e-07, |
|
"loss": 0.0357, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 1.8220640569395017, |
|
"grad_norm": 0.9336238308160726, |
|
"learning_rate": 1.9403407286770592e-07, |
|
"loss": 0.0243, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.8238434163701067, |
|
"grad_norm": 1.0948397419647138, |
|
"learning_rate": 1.9019746520752502e-07, |
|
"loss": 0.0395, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.8256227758007118, |
|
"grad_norm": 0.8288584020861612, |
|
"learning_rate": 1.8639843212321206e-07, |
|
"loss": 0.0232, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 1.8274021352313166, |
|
"grad_norm": 0.9203693465576154, |
|
"learning_rate": 1.826370032931285e-07, |
|
"loss": 0.0238, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 1.8291814946619218, |
|
"grad_norm": 0.8939548723704083, |
|
"learning_rate": 1.789132081018674e-07, |
|
"loss": 0.0229, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.8309608540925266, |
|
"grad_norm": 0.9572900640183559, |
|
"learning_rate": 1.7522707564002706e-07, |
|
"loss": 0.0335, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 1.8327402135231317, |
|
"grad_norm": 0.9964016695825068, |
|
"learning_rate": 1.7157863470397718e-07, |
|
"loss": 0.0298, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.8345195729537367, |
|
"grad_norm": 1.0293981459413142, |
|
"learning_rate": 1.6796791379564138e-07, |
|
"loss": 0.0272, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 1.8362989323843415, |
|
"grad_norm": 0.770019818836542, |
|
"learning_rate": 1.6439494112227173e-07, |
|
"loss": 0.0186, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.8380782918149468, |
|
"grad_norm": 1.1058177844050654, |
|
"learning_rate": 1.6085974459622567e-07, |
|
"loss": 0.0327, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 1.8398576512455516, |
|
"grad_norm": 0.7505988407459522, |
|
"learning_rate": 1.573623518347517e-07, |
|
"loss": 0.0205, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 1.8416370106761566, |
|
"grad_norm": 0.8720094685191757, |
|
"learning_rate": 1.5390279015977117e-07, |
|
"loss": 0.025, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.8434163701067616, |
|
"grad_norm": 0.8715201752041221, |
|
"learning_rate": 1.5048108659766693e-07, |
|
"loss": 0.0339, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.8451957295373664, |
|
"grad_norm": 0.8375440018979823, |
|
"learning_rate": 1.470972678790711e-07, |
|
"loss": 0.0293, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 1.8469750889679717, |
|
"grad_norm": 1.0399293239440708, |
|
"learning_rate": 1.437513604386559e-07, |
|
"loss": 0.0426, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 1.8487544483985765, |
|
"grad_norm": 1.1230274662616628, |
|
"learning_rate": 1.404433904149266e-07, |
|
"loss": 0.0426, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 1.8505338078291815, |
|
"grad_norm": 1.0490680380884723, |
|
"learning_rate": 1.3717338365001943e-07, |
|
"loss": 0.0318, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.8523131672597866, |
|
"grad_norm": 1.7758214643355517, |
|
"learning_rate": 1.3394136568949834e-07, |
|
"loss": 0.0609, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 1.8540925266903914, |
|
"grad_norm": 1.037257983103503, |
|
"learning_rate": 1.307473617821553e-07, |
|
"loss": 0.0339, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 1.8558718861209964, |
|
"grad_norm": 1.2825870345177348, |
|
"learning_rate": 1.275913968798137e-07, |
|
"loss": 0.0389, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 1.8576512455516014, |
|
"grad_norm": 0.7846351465000317, |
|
"learning_rate": 1.2447349563713186e-07, |
|
"loss": 0.0253, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.8594306049822062, |
|
"grad_norm": 1.0036734986062918, |
|
"learning_rate": 1.213936824114137e-07, |
|
"loss": 0.027, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.8612099644128115, |
|
"grad_norm": 0.9539567942293432, |
|
"learning_rate": 1.1835198126241509e-07, |
|
"loss": 0.0247, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 1.8629893238434163, |
|
"grad_norm": 1.7486741623792832, |
|
"learning_rate": 1.1534841595215617e-07, |
|
"loss": 0.073, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 1.8647686832740213, |
|
"grad_norm": 0.9893266899850512, |
|
"learning_rate": 1.1238300994473983e-07, |
|
"loss": 0.0318, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.8665480427046264, |
|
"grad_norm": 1.0343052675967725, |
|
"learning_rate": 1.0945578640616183e-07, |
|
"loss": 0.0389, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 1.8683274021352312, |
|
"grad_norm": 1.1323746727629953, |
|
"learning_rate": 1.0656676820413603e-07, |
|
"loss": 0.036, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.8701067615658364, |
|
"grad_norm": 0.8882117071590495, |
|
"learning_rate": 1.0371597790791166e-07, |
|
"loss": 0.0244, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 1.8718861209964412, |
|
"grad_norm": 1.4054278760982788, |
|
"learning_rate": 1.0090343778809908e-07, |
|
"loss": 0.055, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.8736654804270463, |
|
"grad_norm": 1.1718399227929586, |
|
"learning_rate": 9.812916981649433e-08, |
|
"loss": 0.0325, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 1.8754448398576513, |
|
"grad_norm": 1.0136109192463043, |
|
"learning_rate": 9.539319566590766e-08, |
|
"loss": 0.0331, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 1.8772241992882561, |
|
"grad_norm": 0.9207458489091273, |
|
"learning_rate": 9.269553670999743e-08, |
|
"loss": 0.0273, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.8790035587188612, |
|
"grad_norm": 1.8832552035408634, |
|
"learning_rate": 9.003621402309815e-08, |
|
"loss": 0.0512, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.8807829181494662, |
|
"grad_norm": 1.1085209502881117, |
|
"learning_rate": 8.741524838005888e-08, |
|
"loss": 0.0371, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 1.8825622775800712, |
|
"grad_norm": 1.0304874876849346, |
|
"learning_rate": 8.483266025608061e-08, |
|
"loss": 0.0333, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 1.8843416370106763, |
|
"grad_norm": 1.124313793866581, |
|
"learning_rate": 8.228846982655525e-08, |
|
"loss": 0.0362, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 1.886120996441281, |
|
"grad_norm": 1.0371647076269432, |
|
"learning_rate": 7.978269696691021e-08, |
|
"loss": 0.0313, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.887900355871886, |
|
"grad_norm": 0.8781623744256626, |
|
"learning_rate": 7.731536125244965e-08, |
|
"loss": 0.0263, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 1.8896797153024911, |
|
"grad_norm": 1.0887897624505116, |
|
"learning_rate": 7.488648195820513e-08, |
|
"loss": 0.0306, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 1.891459074733096, |
|
"grad_norm": 1.1709776593421617, |
|
"learning_rate": 7.249607805878245e-08, |
|
"loss": 0.0313, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 1.8932384341637012, |
|
"grad_norm": 1.0853574192827793, |
|
"learning_rate": 7.014416822821557e-08, |
|
"loss": 0.0398, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.895017793594306, |
|
"grad_norm": 0.9397178497016614, |
|
"learning_rate": 6.783077083981793e-08, |
|
"loss": 0.0317, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.896797153024911, |
|
"grad_norm": 1.0197141783305712, |
|
"learning_rate": 6.55559039660425e-08, |
|
"loss": 0.0336, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.898576512455516, |
|
"grad_norm": 1.0714163716494174, |
|
"learning_rate": 6.331958537833693e-08, |
|
"loss": 0.0314, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 1.9003558718861209, |
|
"grad_norm": 1.1122352022231587, |
|
"learning_rate": 6.112183254700866e-08, |
|
"loss": 0.0419, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.9021352313167261, |
|
"grad_norm": 1.0897904640754559, |
|
"learning_rate": 5.8962662641083856e-08, |
|
"loss": 0.0253, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 1.903914590747331, |
|
"grad_norm": 0.8019834775667739, |
|
"learning_rate": 5.6842092528176516e-08, |
|
"loss": 0.0208, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.905693950177936, |
|
"grad_norm": 1.0154927628209092, |
|
"learning_rate": 5.476013877435626e-08, |
|
"loss": 0.0327, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 1.907473309608541, |
|
"grad_norm": 1.1492019861511986, |
|
"learning_rate": 5.271681764401848e-08, |
|
"loss": 0.0349, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.9092526690391458, |
|
"grad_norm": 0.6066967696895387, |
|
"learning_rate": 5.071214509975775e-08, |
|
"loss": 0.0145, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 1.9110320284697508, |
|
"grad_norm": 0.7974773419443782, |
|
"learning_rate": 4.8746136802240716e-08, |
|
"loss": 0.0237, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 1.9128113879003559, |
|
"grad_norm": 0.9581788101872629, |
|
"learning_rate": 4.6818808110087875e-08, |
|
"loss": 0.0224, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.914590747330961, |
|
"grad_norm": 1.5404896446186378, |
|
"learning_rate": 4.493017407975087e-08, |
|
"loss": 0.0437, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.916370106761566, |
|
"grad_norm": 1.1397782238434406, |
|
"learning_rate": 4.308024946539424e-08, |
|
"loss": 0.0342, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 1.9181494661921707, |
|
"grad_norm": 1.1250767329110418, |
|
"learning_rate": 4.1269048718783344e-08, |
|
"loss": 0.0337, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 1.9199288256227758, |
|
"grad_norm": 0.880544157121548, |
|
"learning_rate": 3.9496585989167726e-08, |
|
"loss": 0.021, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 1.9217081850533808, |
|
"grad_norm": 0.8180360840119028, |
|
"learning_rate": 3.776287512317345e-08, |
|
"loss": 0.0261, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.9234875444839856, |
|
"grad_norm": 0.8005088603801955, |
|
"learning_rate": 3.606792966469375e-08, |
|
"loss": 0.0175, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 1.9252669039145909, |
|
"grad_norm": 0.8504609540288297, |
|
"learning_rate": 3.4411762854782426e-08, |
|
"loss": 0.0257, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.9270462633451957, |
|
"grad_norm": 1.3257663312010712, |
|
"learning_rate": 3.279438763155174e-08, |
|
"loss": 0.0318, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 1.9288256227758007, |
|
"grad_norm": 1.4229351345768066, |
|
"learning_rate": 3.121581663007134e-08, |
|
"loss": 0.0342, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.9306049822064058, |
|
"grad_norm": 1.1589920439469523, |
|
"learning_rate": 2.967606218226837e-08, |
|
"loss": 0.0387, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.9323843416370106, |
|
"grad_norm": 0.9002948408342034, |
|
"learning_rate": 2.8175136316832e-08, |
|
"loss": 0.0326, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 1.9341637010676158, |
|
"grad_norm": 0.8120421236879184, |
|
"learning_rate": 2.6713050759120117e-08, |
|
"loss": 0.0181, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 1.9359430604982206, |
|
"grad_norm": 1.0256189774573095, |
|
"learning_rate": 2.528981693106558e-08, |
|
"loss": 0.0275, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.9377224199288257, |
|
"grad_norm": 1.1319321569982366, |
|
"learning_rate": 2.3905445951089013e-08, |
|
"loss": 0.0412, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 1.9395017793594307, |
|
"grad_norm": 1.2319857019056573, |
|
"learning_rate": 2.2559948634011673e-08, |
|
"loss": 0.0327, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.9412811387900355, |
|
"grad_norm": 1.4380195031671035, |
|
"learning_rate": 2.125333549096942e-08, |
|
"loss": 0.053, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 1.9430604982206405, |
|
"grad_norm": 1.1169770627370423, |
|
"learning_rate": 1.9985616729332747e-08, |
|
"loss": 0.0385, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.9448398576512456, |
|
"grad_norm": 1.6080668287951028, |
|
"learning_rate": 1.8756802252625773e-08, |
|
"loss": 0.0582, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 1.9466192170818504, |
|
"grad_norm": 0.981558355929053, |
|
"learning_rate": 1.75669016604485e-08, |
|
"loss": 0.0348, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 1.9483985765124556, |
|
"grad_norm": 0.8736381382440352, |
|
"learning_rate": 1.6415924248403547e-08, |
|
"loss": 0.0238, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.9501779359430604, |
|
"grad_norm": 0.8667973051305249, |
|
"learning_rate": 1.5303879008021773e-08, |
|
"loss": 0.0293, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 1.9519572953736655, |
|
"grad_norm": 1.1224352968313376, |
|
"learning_rate": 1.4230774626691756e-08, |
|
"loss": 0.0271, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 1.9537366548042705, |
|
"grad_norm": 0.8636205703395909, |
|
"learning_rate": 1.3196619487594875e-08, |
|
"loss": 0.0234, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 1.9555160142348753, |
|
"grad_norm": 1.1909479653464772, |
|
"learning_rate": 1.2201421669636448e-08, |
|
"loss": 0.0438, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 1.9572953736654806, |
|
"grad_norm": 1.2745212682434741, |
|
"learning_rate": 1.1245188947384133e-08, |
|
"loss": 0.0371, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.9590747330960854, |
|
"grad_norm": 1.4511635164005035, |
|
"learning_rate": 1.0327928791006858e-08, |
|
"loss": 0.0219, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 1.9608540925266904, |
|
"grad_norm": 0.9937233221884019, |
|
"learning_rate": 9.449648366217645e-09, |
|
"loss": 0.0352, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 1.9626334519572954, |
|
"grad_norm": 1.1568564863571904, |
|
"learning_rate": 8.61035453421588e-09, |
|
"loss": 0.0373, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 1.9644128113879002, |
|
"grad_norm": 0.9725300009720409, |
|
"learning_rate": 7.81005385163458e-09, |
|
"loss": 0.0292, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 1.9661921708185055, |
|
"grad_norm": 0.7344268986125123, |
|
"learning_rate": 7.048752570488205e-09, |
|
"loss": 0.0226, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.9679715302491103, |
|
"grad_norm": 1.1103254711241581, |
|
"learning_rate": 6.326456638125478e-09, |
|
"loss": 0.0329, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 1.9697508896797153, |
|
"grad_norm": 0.7507227548888199, |
|
"learning_rate": 5.643171697183314e-09, |
|
"loss": 0.0207, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 1.9715302491103204, |
|
"grad_norm": 1.219712949959398, |
|
"learning_rate": 4.998903085539075e-09, |
|
"loss": 0.0409, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.9733096085409252, |
|
"grad_norm": 0.9785738143064482, |
|
"learning_rate": 4.393655836272825e-09, |
|
"loss": 0.0325, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 1.9750889679715302, |
|
"grad_norm": 0.974601159460913, |
|
"learning_rate": 3.8274346776262514e-09, |
|
"loss": 0.0331, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.9768683274021353, |
|
"grad_norm": 1.195432594721749, |
|
"learning_rate": 3.300244032966582e-09, |
|
"loss": 0.0272, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 1.97864768683274, |
|
"grad_norm": 0.8922492478187152, |
|
"learning_rate": 2.8120880207493928e-09, |
|
"loss": 0.0195, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 1.9804270462633453, |
|
"grad_norm": 1.0119408512313461, |
|
"learning_rate": 2.362970454491409e-09, |
|
"loss": 0.0233, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 1.9822064056939501, |
|
"grad_norm": 1.2305070982797535, |
|
"learning_rate": 1.952894842735531e-09, |
|
"loss": 0.0328, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 1.9839857651245552, |
|
"grad_norm": 0.7722841339952425, |
|
"learning_rate": 1.5818643890258555e-09, |
|
"loss": 0.0242, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.9857651245551602, |
|
"grad_norm": 1.1607913508838577, |
|
"learning_rate": 1.2498819918843609e-09, |
|
"loss": 0.0321, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 1.987544483985765, |
|
"grad_norm": 1.077423030743801, |
|
"learning_rate": 9.569502447837053e-10, |
|
"loss": 0.0314, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 1.9893238434163703, |
|
"grad_norm": 1.167724159533307, |
|
"learning_rate": 7.03071436131686e-10, |
|
"loss": 0.0403, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 1.991103202846975, |
|
"grad_norm": 0.8391015329772153, |
|
"learning_rate": 4.882475492506977e-10, |
|
"loss": 0.0293, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 1.99288256227758, |
|
"grad_norm": 2.4583348853791893, |
|
"learning_rate": 3.124802623627465e-10, |
|
"loss": 0.0661, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.9946619217081851, |
|
"grad_norm": 1.0657061538812194, |
|
"learning_rate": 1.7577094857557097e-10, |
|
"loss": 0.033, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 1.99644128113879, |
|
"grad_norm": 1.1647030062949417, |
|
"learning_rate": 7.812067587487093e-11, |
|
"loss": 0.0338, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 1.998220640569395, |
|
"grad_norm": 0.815018644425788, |
|
"learning_rate": 1.9530207111539967e-11, |
|
"loss": 0.0207, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.8327208648994265, |
|
"learning_rate": 0.0, |
|
"loss": 0.0215, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1124, |
|
"total_flos": 10614712614912.0, |
|
"train_loss": 0.07053656412911638, |
|
"train_runtime": 2529.1287, |
|
"train_samples_per_second": 3.553, |
|
"train_steps_per_second": 0.444 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1124, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 2000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 10614712614912.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|