|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 0, |
|
"global_step": 511, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0019569471624266144, |
|
"grad_norm": 1.746453046798706, |
|
"learning_rate": 9.980430528375734e-06, |
|
"loss": 1.5399, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.003913894324853229, |
|
"grad_norm": 0.8929949998855591, |
|
"learning_rate": 9.960861056751468e-06, |
|
"loss": 1.4639, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.005870841487279843, |
|
"grad_norm": 0.7871854305267334, |
|
"learning_rate": 9.941291585127202e-06, |
|
"loss": 1.3791, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.007827788649706457, |
|
"grad_norm": 1.535941243171692, |
|
"learning_rate": 9.921722113502935e-06, |
|
"loss": 1.5319, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.009784735812133072, |
|
"grad_norm": 0.8197427988052368, |
|
"learning_rate": 9.902152641878669e-06, |
|
"loss": 1.5193, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.011741682974559686, |
|
"grad_norm": 0.7342958450317383, |
|
"learning_rate": 9.882583170254404e-06, |
|
"loss": 1.3804, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0136986301369863, |
|
"grad_norm": 0.6128910183906555, |
|
"learning_rate": 9.863013698630138e-06, |
|
"loss": 1.3007, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.015655577299412915, |
|
"grad_norm": 0.6029248833656311, |
|
"learning_rate": 9.843444227005872e-06, |
|
"loss": 1.3657, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01761252446183953, |
|
"grad_norm": 0.5792056322097778, |
|
"learning_rate": 9.823874755381605e-06, |
|
"loss": 1.4067, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.019569471624266144, |
|
"grad_norm": 0.6559755206108093, |
|
"learning_rate": 9.804305283757339e-06, |
|
"loss": 1.4272, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021526418786692758, |
|
"grad_norm": 0.5778592228889465, |
|
"learning_rate": 9.784735812133073e-06, |
|
"loss": 1.4143, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.023483365949119372, |
|
"grad_norm": 0.5314830541610718, |
|
"learning_rate": 9.765166340508806e-06, |
|
"loss": 1.3531, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.025440313111545987, |
|
"grad_norm": 0.49222293496131897, |
|
"learning_rate": 9.74559686888454e-06, |
|
"loss": 1.4155, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0273972602739726, |
|
"grad_norm": 0.9724328517913818, |
|
"learning_rate": 9.726027397260275e-06, |
|
"loss": 1.2311, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.029354207436399216, |
|
"grad_norm": 0.488921582698822, |
|
"learning_rate": 9.706457925636007e-06, |
|
"loss": 1.3007, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03131115459882583, |
|
"grad_norm": 0.49755173921585083, |
|
"learning_rate": 9.686888454011743e-06, |
|
"loss": 1.2708, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.033268101761252444, |
|
"grad_norm": 0.49553531408309937, |
|
"learning_rate": 9.667318982387476e-06, |
|
"loss": 1.3346, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03522504892367906, |
|
"grad_norm": 0.40435364842414856, |
|
"learning_rate": 9.64774951076321e-06, |
|
"loss": 1.2333, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03718199608610567, |
|
"grad_norm": 0.46682047843933105, |
|
"learning_rate": 9.628180039138944e-06, |
|
"loss": 1.2733, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03913894324853229, |
|
"grad_norm": 0.4170684218406677, |
|
"learning_rate": 9.608610567514677e-06, |
|
"loss": 1.2262, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0410958904109589, |
|
"grad_norm": 0.4080331027507782, |
|
"learning_rate": 9.589041095890411e-06, |
|
"loss": 1.1896, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.043052837573385516, |
|
"grad_norm": 0.3215958774089813, |
|
"learning_rate": 9.569471624266146e-06, |
|
"loss": 1.2084, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04500978473581213, |
|
"grad_norm": 0.34072086215019226, |
|
"learning_rate": 9.549902152641878e-06, |
|
"loss": 1.2204, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.046966731898238745, |
|
"grad_norm": 0.34246671199798584, |
|
"learning_rate": 9.530332681017614e-06, |
|
"loss": 1.2544, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04892367906066536, |
|
"grad_norm": 0.31154486536979675, |
|
"learning_rate": 9.510763209393347e-06, |
|
"loss": 1.2244, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.050880626223091974, |
|
"grad_norm": 0.29958635568618774, |
|
"learning_rate": 9.49119373776908e-06, |
|
"loss": 1.1961, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05283757338551859, |
|
"grad_norm": 0.3322153687477112, |
|
"learning_rate": 9.471624266144814e-06, |
|
"loss": 1.1918, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0547945205479452, |
|
"grad_norm": 0.30765673518180847, |
|
"learning_rate": 9.452054794520548e-06, |
|
"loss": 1.2193, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05675146771037182, |
|
"grad_norm": 0.3587987720966339, |
|
"learning_rate": 9.432485322896282e-06, |
|
"loss": 1.2119, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05870841487279843, |
|
"grad_norm": 0.3882049024105072, |
|
"learning_rate": 9.412915851272017e-06, |
|
"loss": 1.2632, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.060665362035225046, |
|
"grad_norm": 0.31729480624198914, |
|
"learning_rate": 9.393346379647749e-06, |
|
"loss": 1.1768, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06262230919765166, |
|
"grad_norm": 0.32497256994247437, |
|
"learning_rate": 9.373776908023484e-06, |
|
"loss": 1.1603, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06457925636007827, |
|
"grad_norm": 0.3473309576511383, |
|
"learning_rate": 9.354207436399218e-06, |
|
"loss": 1.2131, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06653620352250489, |
|
"grad_norm": 0.35079726576805115, |
|
"learning_rate": 9.334637964774952e-06, |
|
"loss": 1.1437, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"grad_norm": 0.33893632888793945, |
|
"learning_rate": 9.315068493150685e-06, |
|
"loss": 1.1662, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07045009784735812, |
|
"grad_norm": 0.2970063090324402, |
|
"learning_rate": 9.295499021526419e-06, |
|
"loss": 1.1354, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07240704500978473, |
|
"grad_norm": 0.29775238037109375, |
|
"learning_rate": 9.275929549902153e-06, |
|
"loss": 1.1152, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07436399217221135, |
|
"grad_norm": 0.28359663486480713, |
|
"learning_rate": 9.256360078277888e-06, |
|
"loss": 1.1677, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07632093933463796, |
|
"grad_norm": 0.45086753368377686, |
|
"learning_rate": 9.23679060665362e-06, |
|
"loss": 1.1375, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.07827788649706457, |
|
"grad_norm": 0.3175908029079437, |
|
"learning_rate": 9.217221135029355e-06, |
|
"loss": 1.1484, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08023483365949119, |
|
"grad_norm": 0.28813987970352173, |
|
"learning_rate": 9.197651663405089e-06, |
|
"loss": 1.1496, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0821917808219178, |
|
"grad_norm": 0.43875834345817566, |
|
"learning_rate": 9.178082191780823e-06, |
|
"loss": 1.1561, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08414872798434442, |
|
"grad_norm": 0.29168814420700073, |
|
"learning_rate": 9.158512720156556e-06, |
|
"loss": 1.123, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08610567514677103, |
|
"grad_norm": 0.2780202627182007, |
|
"learning_rate": 9.13894324853229e-06, |
|
"loss": 1.0861, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.08806262230919765, |
|
"grad_norm": 0.31432074308395386, |
|
"learning_rate": 9.119373776908024e-06, |
|
"loss": 1.1191, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09001956947162426, |
|
"grad_norm": 0.31427478790283203, |
|
"learning_rate": 9.099804305283759e-06, |
|
"loss": 1.1696, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09197651663405088, |
|
"grad_norm": 0.3010156452655792, |
|
"learning_rate": 9.080234833659491e-06, |
|
"loss": 1.1092, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.09393346379647749, |
|
"grad_norm": 0.3034595549106598, |
|
"learning_rate": 9.060665362035226e-06, |
|
"loss": 1.1503, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0958904109589041, |
|
"grad_norm": 0.29212486743927, |
|
"learning_rate": 9.04109589041096e-06, |
|
"loss": 1.063, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.09784735812133072, |
|
"grad_norm": 0.3151186406612396, |
|
"learning_rate": 9.021526418786694e-06, |
|
"loss": 1.1331, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09980430528375733, |
|
"grad_norm": 0.31028345227241516, |
|
"learning_rate": 9.001956947162427e-06, |
|
"loss": 1.082, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.10176125244618395, |
|
"grad_norm": 0.2922515869140625, |
|
"learning_rate": 8.982387475538161e-06, |
|
"loss": 1.1151, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.10371819960861056, |
|
"grad_norm": 0.3567720651626587, |
|
"learning_rate": 8.962818003913895e-06, |
|
"loss": 1.075, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.10567514677103718, |
|
"grad_norm": 0.2653373181819916, |
|
"learning_rate": 8.943248532289628e-06, |
|
"loss": 1.1201, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.10763209393346379, |
|
"grad_norm": 0.301695317029953, |
|
"learning_rate": 8.923679060665362e-06, |
|
"loss": 1.0929, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1095890410958904, |
|
"grad_norm": 0.407412052154541, |
|
"learning_rate": 8.904109589041097e-06, |
|
"loss": 1.0487, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11154598825831702, |
|
"grad_norm": 0.2796148657798767, |
|
"learning_rate": 8.88454011741683e-06, |
|
"loss": 1.0566, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11350293542074363, |
|
"grad_norm": 0.26347002387046814, |
|
"learning_rate": 8.864970645792564e-06, |
|
"loss": 1.0474, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.11545988258317025, |
|
"grad_norm": 0.40815216302871704, |
|
"learning_rate": 8.845401174168298e-06, |
|
"loss": 1.0143, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.11741682974559686, |
|
"grad_norm": 0.3168679475784302, |
|
"learning_rate": 8.825831702544032e-06, |
|
"loss": 0.9874, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11937377690802348, |
|
"grad_norm": 0.3845454454421997, |
|
"learning_rate": 8.806262230919765e-06, |
|
"loss": 0.9675, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.12133072407045009, |
|
"grad_norm": 0.3026748299598694, |
|
"learning_rate": 8.786692759295499e-06, |
|
"loss": 1.0715, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1232876712328767, |
|
"grad_norm": 0.3220444917678833, |
|
"learning_rate": 8.767123287671233e-06, |
|
"loss": 0.9537, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.12524461839530332, |
|
"grad_norm": 0.2780131995677948, |
|
"learning_rate": 8.747553816046968e-06, |
|
"loss": 0.9989, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.12720156555772993, |
|
"grad_norm": 0.2975338101387024, |
|
"learning_rate": 8.7279843444227e-06, |
|
"loss": 1.0493, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.12915851272015655, |
|
"grad_norm": 0.29012811183929443, |
|
"learning_rate": 8.708414872798435e-06, |
|
"loss": 1.0393, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13111545988258316, |
|
"grad_norm": 0.25619998574256897, |
|
"learning_rate": 8.688845401174169e-06, |
|
"loss": 1.0265, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.13307240704500978, |
|
"grad_norm": 0.27613916993141174, |
|
"learning_rate": 8.669275929549903e-06, |
|
"loss": 1.0921, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1350293542074364, |
|
"grad_norm": 0.28087612986564636, |
|
"learning_rate": 8.649706457925636e-06, |
|
"loss": 1.0375, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.136986301369863, |
|
"grad_norm": 0.2618680000305176, |
|
"learning_rate": 8.63013698630137e-06, |
|
"loss": 1.0382, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13894324853228962, |
|
"grad_norm": 0.38419079780578613, |
|
"learning_rate": 8.610567514677104e-06, |
|
"loss": 1.0343, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.14090019569471623, |
|
"grad_norm": 0.2579880654811859, |
|
"learning_rate": 8.590998043052839e-06, |
|
"loss": 1.034, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 0.2773086726665497, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 1.0823, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.14481409001956946, |
|
"grad_norm": 0.2644873559474945, |
|
"learning_rate": 8.551859099804306e-06, |
|
"loss": 1.0831, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.14677103718199608, |
|
"grad_norm": 0.3036477863788605, |
|
"learning_rate": 8.53228962818004e-06, |
|
"loss": 1.0366, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1487279843444227, |
|
"grad_norm": 0.25494951009750366, |
|
"learning_rate": 8.512720156555774e-06, |
|
"loss": 0.9652, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1506849315068493, |
|
"grad_norm": 0.27949514985084534, |
|
"learning_rate": 8.493150684931507e-06, |
|
"loss": 1.0384, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.15264187866927592, |
|
"grad_norm": 0.2715696096420288, |
|
"learning_rate": 8.473581213307241e-06, |
|
"loss": 1.1061, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.15459882583170254, |
|
"grad_norm": 0.36530429124832153, |
|
"learning_rate": 8.454011741682975e-06, |
|
"loss": 1.103, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.15655577299412915, |
|
"grad_norm": 0.3417298495769501, |
|
"learning_rate": 8.43444227005871e-06, |
|
"loss": 1.0395, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15851272015655576, |
|
"grad_norm": 0.25749561190605164, |
|
"learning_rate": 8.414872798434442e-06, |
|
"loss": 1.0554, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.16046966731898238, |
|
"grad_norm": 0.30251964926719666, |
|
"learning_rate": 8.395303326810177e-06, |
|
"loss": 1.0466, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.162426614481409, |
|
"grad_norm": 0.27155768871307373, |
|
"learning_rate": 8.37573385518591e-06, |
|
"loss": 1.0019, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1643835616438356, |
|
"grad_norm": 0.2923905551433563, |
|
"learning_rate": 8.356164383561644e-06, |
|
"loss": 1.0335, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.16634050880626222, |
|
"grad_norm": 0.2730099558830261, |
|
"learning_rate": 8.336594911937378e-06, |
|
"loss": 1.0066, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.16829745596868884, |
|
"grad_norm": 0.27152329683303833, |
|
"learning_rate": 8.317025440313112e-06, |
|
"loss": 1.0408, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.17025440313111545, |
|
"grad_norm": 0.2805017828941345, |
|
"learning_rate": 8.297455968688845e-06, |
|
"loss": 1.0159, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.17221135029354206, |
|
"grad_norm": 0.30287447571754456, |
|
"learning_rate": 8.27788649706458e-06, |
|
"loss": 0.9943, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.17416829745596868, |
|
"grad_norm": 0.4621107280254364, |
|
"learning_rate": 8.258317025440313e-06, |
|
"loss": 0.9984, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.1761252446183953, |
|
"grad_norm": 0.27693963050842285, |
|
"learning_rate": 8.238747553816048e-06, |
|
"loss": 1.0471, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1780821917808219, |
|
"grad_norm": 0.2575695514678955, |
|
"learning_rate": 8.219178082191782e-06, |
|
"loss": 1.0124, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.18003913894324852, |
|
"grad_norm": 0.3268100321292877, |
|
"learning_rate": 8.199608610567515e-06, |
|
"loss": 1.0201, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.18199608610567514, |
|
"grad_norm": 0.2674817144870758, |
|
"learning_rate": 8.180039138943249e-06, |
|
"loss": 1.0491, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.18395303326810175, |
|
"grad_norm": 0.29703083634376526, |
|
"learning_rate": 8.160469667318983e-06, |
|
"loss": 0.9988, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.18590998043052837, |
|
"grad_norm": 0.3002019226551056, |
|
"learning_rate": 8.140900195694716e-06, |
|
"loss": 0.983, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.18786692759295498, |
|
"grad_norm": 0.28777456283569336, |
|
"learning_rate": 8.121330724070452e-06, |
|
"loss": 1.0218, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.1898238747553816, |
|
"grad_norm": 0.27293819189071655, |
|
"learning_rate": 8.101761252446184e-06, |
|
"loss": 1.0338, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.1917808219178082, |
|
"grad_norm": 0.288841187953949, |
|
"learning_rate": 8.082191780821919e-06, |
|
"loss": 1.0355, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.19373776908023482, |
|
"grad_norm": 0.2783367931842804, |
|
"learning_rate": 8.062622309197653e-06, |
|
"loss": 1.0328, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.19569471624266144, |
|
"grad_norm": 0.3079596161842346, |
|
"learning_rate": 8.043052837573386e-06, |
|
"loss": 1.0034, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19765166340508805, |
|
"grad_norm": 0.27803629636764526, |
|
"learning_rate": 8.02348336594912e-06, |
|
"loss": 0.9606, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.19960861056751467, |
|
"grad_norm": 0.2793106138706207, |
|
"learning_rate": 8.003913894324854e-06, |
|
"loss": 0.9918, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.20156555772994128, |
|
"grad_norm": 0.3062870502471924, |
|
"learning_rate": 7.984344422700587e-06, |
|
"loss": 1.027, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2035225048923679, |
|
"grad_norm": 0.2591916620731354, |
|
"learning_rate": 7.964774951076321e-06, |
|
"loss": 0.9696, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2054794520547945, |
|
"grad_norm": 0.27566251158714294, |
|
"learning_rate": 7.945205479452055e-06, |
|
"loss": 0.9723, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.20743639921722112, |
|
"grad_norm": 0.5589897632598877, |
|
"learning_rate": 7.92563600782779e-06, |
|
"loss": 0.8956, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.20939334637964774, |
|
"grad_norm": 0.3209697902202606, |
|
"learning_rate": 7.906066536203524e-06, |
|
"loss": 1.0195, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.21135029354207435, |
|
"grad_norm": 0.5480762720108032, |
|
"learning_rate": 7.886497064579257e-06, |
|
"loss": 0.8403, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.21330724070450097, |
|
"grad_norm": 0.27812933921813965, |
|
"learning_rate": 7.86692759295499e-06, |
|
"loss": 0.9995, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.21526418786692758, |
|
"grad_norm": 0.3054767847061157, |
|
"learning_rate": 7.847358121330724e-06, |
|
"loss": 0.9886, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2172211350293542, |
|
"grad_norm": 0.26338857412338257, |
|
"learning_rate": 7.827788649706458e-06, |
|
"loss": 1.0099, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2191780821917808, |
|
"grad_norm": 0.28542402386665344, |
|
"learning_rate": 7.808219178082192e-06, |
|
"loss": 1.0108, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.22113502935420742, |
|
"grad_norm": 0.2645825147628784, |
|
"learning_rate": 7.788649706457925e-06, |
|
"loss": 0.9855, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.22309197651663404, |
|
"grad_norm": 0.36923593282699585, |
|
"learning_rate": 7.76908023483366e-06, |
|
"loss": 0.874, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.22504892367906065, |
|
"grad_norm": 0.2942226827144623, |
|
"learning_rate": 7.749510763209393e-06, |
|
"loss": 1.035, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.22700587084148727, |
|
"grad_norm": 0.25831156969070435, |
|
"learning_rate": 7.729941291585128e-06, |
|
"loss": 0.9821, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.22896281800391388, |
|
"grad_norm": 0.2656974196434021, |
|
"learning_rate": 7.710371819960862e-06, |
|
"loss": 0.9607, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2309197651663405, |
|
"grad_norm": 0.2775110602378845, |
|
"learning_rate": 7.690802348336595e-06, |
|
"loss": 0.9405, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.2328767123287671, |
|
"grad_norm": 0.2815232276916504, |
|
"learning_rate": 7.671232876712329e-06, |
|
"loss": 0.9963, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.23483365949119372, |
|
"grad_norm": 0.2941558063030243, |
|
"learning_rate": 7.651663405088063e-06, |
|
"loss": 1.0005, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23679060665362034, |
|
"grad_norm": 0.3432468771934509, |
|
"learning_rate": 7.632093933463796e-06, |
|
"loss": 0.9146, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.23874755381604695, |
|
"grad_norm": 0.2610355615615845, |
|
"learning_rate": 7.612524461839531e-06, |
|
"loss": 1.0172, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.24070450097847357, |
|
"grad_norm": 0.30524012446403503, |
|
"learning_rate": 7.5929549902152645e-06, |
|
"loss": 0.8499, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.24266144814090018, |
|
"grad_norm": 0.32364916801452637, |
|
"learning_rate": 7.573385518590999e-06, |
|
"loss": 0.9872, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2446183953033268, |
|
"grad_norm": 0.3468589186668396, |
|
"learning_rate": 7.553816046966732e-06, |
|
"loss": 0.8668, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2465753424657534, |
|
"grad_norm": 0.28638043999671936, |
|
"learning_rate": 7.534246575342466e-06, |
|
"loss": 0.9535, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.24853228962818003, |
|
"grad_norm": 0.4365461766719818, |
|
"learning_rate": 7.5146771037182e-06, |
|
"loss": 0.9555, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.25048923679060664, |
|
"grad_norm": 0.2678782641887665, |
|
"learning_rate": 7.4951076320939344e-06, |
|
"loss": 0.9609, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.25244618395303325, |
|
"grad_norm": 0.32698872685432434, |
|
"learning_rate": 7.475538160469667e-06, |
|
"loss": 0.9961, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.25440313111545987, |
|
"grad_norm": 0.2704651653766632, |
|
"learning_rate": 7.455968688845402e-06, |
|
"loss": 0.9892, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2563600782778865, |
|
"grad_norm": 0.28522607684135437, |
|
"learning_rate": 7.436399217221135e-06, |
|
"loss": 0.9704, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.2583170254403131, |
|
"grad_norm": 0.3018089532852173, |
|
"learning_rate": 7.41682974559687e-06, |
|
"loss": 0.9922, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.2602739726027397, |
|
"grad_norm": 0.3053472638130188, |
|
"learning_rate": 7.397260273972603e-06, |
|
"loss": 0.9679, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.2622309197651663, |
|
"grad_norm": 0.3184056580066681, |
|
"learning_rate": 7.377690802348337e-06, |
|
"loss": 0.993, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.26418786692759294, |
|
"grad_norm": 0.2696513831615448, |
|
"learning_rate": 7.358121330724071e-06, |
|
"loss": 0.9933, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.26614481409001955, |
|
"grad_norm": 0.2935352921485901, |
|
"learning_rate": 7.338551859099805e-06, |
|
"loss": 0.9124, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.26810176125244617, |
|
"grad_norm": 0.29200154542922974, |
|
"learning_rate": 7.318982387475538e-06, |
|
"loss": 1.0168, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2700587084148728, |
|
"grad_norm": 0.29628440737724304, |
|
"learning_rate": 7.299412915851273e-06, |
|
"loss": 0.8931, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.2720156555772994, |
|
"grad_norm": 0.2664463520050049, |
|
"learning_rate": 7.279843444227006e-06, |
|
"loss": 0.9743, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 0.3182372748851776, |
|
"learning_rate": 7.260273972602741e-06, |
|
"loss": 0.961, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2759295499021526, |
|
"grad_norm": 0.2961776554584503, |
|
"learning_rate": 7.240704500978474e-06, |
|
"loss": 0.9807, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.27788649706457924, |
|
"grad_norm": 0.2903692126274109, |
|
"learning_rate": 7.221135029354208e-06, |
|
"loss": 0.9685, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.27984344422700586, |
|
"grad_norm": 0.43462836742401123, |
|
"learning_rate": 7.201565557729942e-06, |
|
"loss": 0.9757, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.28180039138943247, |
|
"grad_norm": 0.4679277241230011, |
|
"learning_rate": 7.181996086105676e-06, |
|
"loss": 0.9829, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2837573385518591, |
|
"grad_norm": 0.3153940737247467, |
|
"learning_rate": 7.162426614481409e-06, |
|
"loss": 1.0502, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 0.2722516357898712, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.9777, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2876712328767123, |
|
"grad_norm": 0.30617383122444153, |
|
"learning_rate": 7.123287671232877e-06, |
|
"loss": 0.8937, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.2896281800391389, |
|
"grad_norm": 0.28956839442253113, |
|
"learning_rate": 7.103718199608612e-06, |
|
"loss": 0.9796, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.29158512720156554, |
|
"grad_norm": 0.31176698207855225, |
|
"learning_rate": 7.0841487279843445e-06, |
|
"loss": 0.9775, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.29354207436399216, |
|
"grad_norm": 0.3150478005409241, |
|
"learning_rate": 7.064579256360079e-06, |
|
"loss": 0.9624, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.29549902152641877, |
|
"grad_norm": 0.26309195160865784, |
|
"learning_rate": 7.045009784735813e-06, |
|
"loss": 1.0398, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2974559686888454, |
|
"grad_norm": 0.3138732612133026, |
|
"learning_rate": 7.025440313111546e-06, |
|
"loss": 0.9977, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.299412915851272, |
|
"grad_norm": 0.39994385838508606, |
|
"learning_rate": 7.00587084148728e-06, |
|
"loss": 0.8905, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3013698630136986, |
|
"grad_norm": 0.3341100811958313, |
|
"learning_rate": 6.9863013698630145e-06, |
|
"loss": 0.946, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.30332681017612523, |
|
"grad_norm": 0.2890676259994507, |
|
"learning_rate": 6.966731898238748e-06, |
|
"loss": 0.9756, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.30528375733855184, |
|
"grad_norm": 0.2878880202770233, |
|
"learning_rate": 6.947162426614482e-06, |
|
"loss": 1.002, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.30724070450097846, |
|
"grad_norm": 0.31986042857170105, |
|
"learning_rate": 6.927592954990215e-06, |
|
"loss": 0.9563, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.30919765166340507, |
|
"grad_norm": 0.3330422639846802, |
|
"learning_rate": 6.90802348336595e-06, |
|
"loss": 0.946, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3111545988258317, |
|
"grad_norm": 0.3121936321258545, |
|
"learning_rate": 6.8884540117416836e-06, |
|
"loss": 0.9553, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3131115459882583, |
|
"grad_norm": 0.32173246145248413, |
|
"learning_rate": 6.868884540117417e-06, |
|
"loss": 0.9717, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3150684931506849, |
|
"grad_norm": 0.32296982407569885, |
|
"learning_rate": 6.849315068493151e-06, |
|
"loss": 0.8586, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.31702544031311153, |
|
"grad_norm": 0.319832444190979, |
|
"learning_rate": 6.829745596868885e-06, |
|
"loss": 0.9785, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.31898238747553814, |
|
"grad_norm": 0.3126278817653656, |
|
"learning_rate": 6.810176125244618e-06, |
|
"loss": 0.9448, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.32093933463796476, |
|
"grad_norm": 0.3096999228000641, |
|
"learning_rate": 6.790606653620353e-06, |
|
"loss": 0.971, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.32289628180039137, |
|
"grad_norm": 0.3132016062736511, |
|
"learning_rate": 6.771037181996086e-06, |
|
"loss": 0.9722, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.324853228962818, |
|
"grad_norm": 0.3196086585521698, |
|
"learning_rate": 6.751467710371821e-06, |
|
"loss": 0.9611, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3268101761252446, |
|
"grad_norm": 0.33392807841300964, |
|
"learning_rate": 6.731898238747554e-06, |
|
"loss": 0.9585, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3287671232876712, |
|
"grad_norm": 0.3167315125465393, |
|
"learning_rate": 6.712328767123288e-06, |
|
"loss": 0.8919, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.33072407045009783, |
|
"grad_norm": 0.3052123188972473, |
|
"learning_rate": 6.692759295499022e-06, |
|
"loss": 0.944, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.33268101761252444, |
|
"grad_norm": 0.32091811299324036, |
|
"learning_rate": 6.673189823874756e-06, |
|
"loss": 0.898, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.33463796477495106, |
|
"grad_norm": 0.3221595287322998, |
|
"learning_rate": 6.653620352250489e-06, |
|
"loss": 0.9206, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.33659491193737767, |
|
"grad_norm": 0.3247275650501251, |
|
"learning_rate": 6.634050880626224e-06, |
|
"loss": 0.9629, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3385518590998043, |
|
"grad_norm": 0.3308790624141693, |
|
"learning_rate": 6.614481409001957e-06, |
|
"loss": 0.9712, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3405088062622309, |
|
"grad_norm": 0.2884618937969208, |
|
"learning_rate": 6.594911937377692e-06, |
|
"loss": 0.9922, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"grad_norm": 0.2902919054031372, |
|
"learning_rate": 6.5753424657534245e-06, |
|
"loss": 0.9865, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.34442270058708413, |
|
"grad_norm": 0.3081991374492645, |
|
"learning_rate": 6.555772994129159e-06, |
|
"loss": 0.9578, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.34637964774951074, |
|
"grad_norm": 0.30048370361328125, |
|
"learning_rate": 6.536203522504893e-06, |
|
"loss": 1.0135, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.34833659491193736, |
|
"grad_norm": 0.30617308616638184, |
|
"learning_rate": 6.516634050880627e-06, |
|
"loss": 0.932, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.350293542074364, |
|
"grad_norm": 0.32503214478492737, |
|
"learning_rate": 6.49706457925636e-06, |
|
"loss": 0.8688, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.3522504892367906, |
|
"grad_norm": 0.348254531621933, |
|
"learning_rate": 6.4774951076320945e-06, |
|
"loss": 0.9353, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3542074363992172, |
|
"grad_norm": 0.3076007664203644, |
|
"learning_rate": 6.457925636007828e-06, |
|
"loss": 0.9643, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3561643835616438, |
|
"grad_norm": 0.31836771965026855, |
|
"learning_rate": 6.438356164383563e-06, |
|
"loss": 0.9717, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.35812133072407043, |
|
"grad_norm": 0.3177882134914398, |
|
"learning_rate": 6.4187866927592954e-06, |
|
"loss": 0.9363, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.36007827788649704, |
|
"grad_norm": 0.35349786281585693, |
|
"learning_rate": 6.39921722113503e-06, |
|
"loss": 0.9697, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.36203522504892366, |
|
"grad_norm": 0.3868875801563263, |
|
"learning_rate": 6.379647749510764e-06, |
|
"loss": 0.9895, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3639921722113503, |
|
"grad_norm": 0.3449805676937103, |
|
"learning_rate": 6.360078277886498e-06, |
|
"loss": 0.9625, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3659491193737769, |
|
"grad_norm": 0.3141196370124817, |
|
"learning_rate": 6.340508806262231e-06, |
|
"loss": 0.8996, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3679060665362035, |
|
"grad_norm": 0.3363155424594879, |
|
"learning_rate": 6.320939334637965e-06, |
|
"loss": 0.9723, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.3698630136986301, |
|
"grad_norm": 0.27333149313926697, |
|
"learning_rate": 6.301369863013699e-06, |
|
"loss": 0.9592, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.37181996086105673, |
|
"grad_norm": 0.32245489954948425, |
|
"learning_rate": 6.2818003913894335e-06, |
|
"loss": 0.911, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.37377690802348335, |
|
"grad_norm": 0.31895750761032104, |
|
"learning_rate": 6.262230919765166e-06, |
|
"loss": 0.9951, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.37573385518590996, |
|
"grad_norm": 0.373411625623703, |
|
"learning_rate": 6.242661448140901e-06, |
|
"loss": 0.9124, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.3776908023483366, |
|
"grad_norm": 0.30244988203048706, |
|
"learning_rate": 6.2230919765166345e-06, |
|
"loss": 0.9502, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.3796477495107632, |
|
"grad_norm": 0.29507070779800415, |
|
"learning_rate": 6.203522504892369e-06, |
|
"loss": 0.9594, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3816046966731898, |
|
"grad_norm": 0.31607192754745483, |
|
"learning_rate": 6.183953033268102e-06, |
|
"loss": 0.9379, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3835616438356164, |
|
"grad_norm": 0.3330182135105133, |
|
"learning_rate": 6.164383561643836e-06, |
|
"loss": 0.954, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.38551859099804303, |
|
"grad_norm": 0.33578622341156006, |
|
"learning_rate": 6.14481409001957e-06, |
|
"loss": 0.9416, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.38747553816046965, |
|
"grad_norm": 0.3267570436000824, |
|
"learning_rate": 6.1252446183953044e-06, |
|
"loss": 0.9379, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.38943248532289626, |
|
"grad_norm": 0.33791911602020264, |
|
"learning_rate": 6.105675146771037e-06, |
|
"loss": 0.9247, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.3913894324853229, |
|
"grad_norm": 0.32018688321113586, |
|
"learning_rate": 6.086105675146772e-06, |
|
"loss": 0.8886, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3933463796477495, |
|
"grad_norm": 0.32782450318336487, |
|
"learning_rate": 6.066536203522505e-06, |
|
"loss": 0.8923, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.3953033268101761, |
|
"grad_norm": 0.32713061571121216, |
|
"learning_rate": 6.046966731898239e-06, |
|
"loss": 0.9265, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.3972602739726027, |
|
"grad_norm": 0.3310089409351349, |
|
"learning_rate": 6.027397260273973e-06, |
|
"loss": 0.961, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.39921722113502933, |
|
"grad_norm": 0.35549378395080566, |
|
"learning_rate": 6.007827788649707e-06, |
|
"loss": 0.9342, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.40117416829745595, |
|
"grad_norm": 0.29176008701324463, |
|
"learning_rate": 5.988258317025441e-06, |
|
"loss": 0.9507, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.40313111545988256, |
|
"grad_norm": 0.32877659797668457, |
|
"learning_rate": 5.9686888454011745e-06, |
|
"loss": 0.9162, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4050880626223092, |
|
"grad_norm": 0.33669957518577576, |
|
"learning_rate": 5.949119373776908e-06, |
|
"loss": 0.9424, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.4070450097847358, |
|
"grad_norm": 0.35781094431877136, |
|
"learning_rate": 5.929549902152643e-06, |
|
"loss": 0.9469, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4090019569471624, |
|
"grad_norm": 0.4322330355644226, |
|
"learning_rate": 5.909980430528376e-06, |
|
"loss": 0.8294, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 0.3147006034851074, |
|
"learning_rate": 5.89041095890411e-06, |
|
"loss": 0.8941, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.41291585127201563, |
|
"grad_norm": 0.3192490339279175, |
|
"learning_rate": 5.870841487279844e-06, |
|
"loss": 0.9286, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.41487279843444225, |
|
"grad_norm": 0.287655234336853, |
|
"learning_rate": 5.851272015655578e-06, |
|
"loss": 0.9372, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.41682974559686886, |
|
"grad_norm": 0.3179602324962616, |
|
"learning_rate": 5.831702544031311e-06, |
|
"loss": 0.9261, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.4187866927592955, |
|
"grad_norm": 0.4064527750015259, |
|
"learning_rate": 5.812133072407045e-06, |
|
"loss": 0.9122, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.4207436399217221, |
|
"grad_norm": 0.4332832992076874, |
|
"learning_rate": 5.792563600782779e-06, |
|
"loss": 0.9605, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4227005870841487, |
|
"grad_norm": 0.32594162225723267, |
|
"learning_rate": 5.7729941291585136e-06, |
|
"loss": 0.8909, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.4246575342465753, |
|
"grad_norm": 0.30977311730384827, |
|
"learning_rate": 5.753424657534246e-06, |
|
"loss": 0.9206, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.42661448140900193, |
|
"grad_norm": 0.3289760947227478, |
|
"learning_rate": 5.733855185909981e-06, |
|
"loss": 0.9241, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 0.3369634747505188, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.9863, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.43052837573385516, |
|
"grad_norm": 0.34902551770210266, |
|
"learning_rate": 5.694716242661449e-06, |
|
"loss": 0.9399, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4324853228962818, |
|
"grad_norm": 0.339798241853714, |
|
"learning_rate": 5.675146771037182e-06, |
|
"loss": 0.9525, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.4344422700587084, |
|
"grad_norm": 0.3014651834964752, |
|
"learning_rate": 5.655577299412916e-06, |
|
"loss": 0.9911, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.436399217221135, |
|
"grad_norm": 0.28443804383277893, |
|
"learning_rate": 5.63600782778865e-06, |
|
"loss": 0.9884, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.4383561643835616, |
|
"grad_norm": 0.5278264284133911, |
|
"learning_rate": 5.6164383561643845e-06, |
|
"loss": 0.924, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.44031311154598823, |
|
"grad_norm": 0.313249409198761, |
|
"learning_rate": 5.596868884540117e-06, |
|
"loss": 0.8896, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.44227005870841485, |
|
"grad_norm": 0.3332677483558655, |
|
"learning_rate": 5.577299412915852e-06, |
|
"loss": 0.8871, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.44422700587084146, |
|
"grad_norm": 0.3289450705051422, |
|
"learning_rate": 5.557729941291585e-06, |
|
"loss": 0.9482, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.4461839530332681, |
|
"grad_norm": 0.340781569480896, |
|
"learning_rate": 5.53816046966732e-06, |
|
"loss": 0.9035, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.4481409001956947, |
|
"grad_norm": 0.34197819232940674, |
|
"learning_rate": 5.518590998043053e-06, |
|
"loss": 0.9275, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.4500978473581213, |
|
"grad_norm": 0.4397524893283844, |
|
"learning_rate": 5.499021526418787e-06, |
|
"loss": 0.9485, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4520547945205479, |
|
"grad_norm": 0.3033043444156647, |
|
"learning_rate": 5.479452054794521e-06, |
|
"loss": 0.867, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.45401174168297453, |
|
"grad_norm": 0.3285888135433197, |
|
"learning_rate": 5.459882583170255e-06, |
|
"loss": 0.9199, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.45596868884540115, |
|
"grad_norm": 0.33250048756599426, |
|
"learning_rate": 5.440313111545988e-06, |
|
"loss": 0.8926, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.45792563600782776, |
|
"grad_norm": 0.3682827651500702, |
|
"learning_rate": 5.420743639921723e-06, |
|
"loss": 0.9012, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.4598825831702544, |
|
"grad_norm": 0.31080353260040283, |
|
"learning_rate": 5.401174168297456e-06, |
|
"loss": 0.8953, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.461839530332681, |
|
"grad_norm": 0.3215543329715729, |
|
"learning_rate": 5.381604696673191e-06, |
|
"loss": 0.7648, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.4637964774951076, |
|
"grad_norm": 0.33108121156692505, |
|
"learning_rate": 5.362035225048924e-06, |
|
"loss": 0.9314, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.4657534246575342, |
|
"grad_norm": 0.3492167294025421, |
|
"learning_rate": 5.342465753424658e-06, |
|
"loss": 0.9463, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.46771037181996084, |
|
"grad_norm": 0.3727250099182129, |
|
"learning_rate": 5.322896281800392e-06, |
|
"loss": 0.9519, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.46966731898238745, |
|
"grad_norm": 0.3256610929965973, |
|
"learning_rate": 5.303326810176126e-06, |
|
"loss": 0.919, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.47162426614481406, |
|
"grad_norm": 0.30512261390686035, |
|
"learning_rate": 5.283757338551859e-06, |
|
"loss": 0.9372, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4735812133072407, |
|
"grad_norm": 0.3406316041946411, |
|
"learning_rate": 5.2641878669275936e-06, |
|
"loss": 0.9323, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.4755381604696673, |
|
"grad_norm": 0.3489183485507965, |
|
"learning_rate": 5.244618395303327e-06, |
|
"loss": 0.9259, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.4774951076320939, |
|
"grad_norm": 0.349557489156723, |
|
"learning_rate": 5.225048923679062e-06, |
|
"loss": 0.8651, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.4794520547945205, |
|
"grad_norm": 0.3324158191680908, |
|
"learning_rate": 5.2054794520547945e-06, |
|
"loss": 0.8787, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.48140900195694714, |
|
"grad_norm": 0.3594268560409546, |
|
"learning_rate": 5.185909980430529e-06, |
|
"loss": 0.8767, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.48336594911937375, |
|
"grad_norm": 0.33352982997894287, |
|
"learning_rate": 5.166340508806263e-06, |
|
"loss": 0.8714, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.48532289628180036, |
|
"grad_norm": 0.3096468150615692, |
|
"learning_rate": 5.146771037181997e-06, |
|
"loss": 0.9296, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.487279843444227, |
|
"grad_norm": 0.3263510763645172, |
|
"learning_rate": 5.12720156555773e-06, |
|
"loss": 0.9597, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.4892367906066536, |
|
"grad_norm": 0.3318216800689697, |
|
"learning_rate": 5.1076320939334645e-06, |
|
"loss": 0.9144, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4911937377690802, |
|
"grad_norm": 0.39225342869758606, |
|
"learning_rate": 5.088062622309198e-06, |
|
"loss": 0.934, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.4931506849315068, |
|
"grad_norm": 0.3386378884315491, |
|
"learning_rate": 5.068493150684932e-06, |
|
"loss": 0.9443, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.49510763209393344, |
|
"grad_norm": 0.31350958347320557, |
|
"learning_rate": 5.0489236790606654e-06, |
|
"loss": 0.9419, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.49706457925636005, |
|
"grad_norm": 0.3767964839935303, |
|
"learning_rate": 5.0293542074364e-06, |
|
"loss": 0.9299, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.49902152641878667, |
|
"grad_norm": 0.3285723924636841, |
|
"learning_rate": 5.009784735812134e-06, |
|
"loss": 0.9561, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5009784735812133, |
|
"grad_norm": 0.38723042607307434, |
|
"learning_rate": 4.990215264187867e-06, |
|
"loss": 0.9415, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.50293542074364, |
|
"grad_norm": 0.31375616788864136, |
|
"learning_rate": 4.970645792563601e-06, |
|
"loss": 0.9507, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5048923679060665, |
|
"grad_norm": 0.33384719491004944, |
|
"learning_rate": 4.9510763209393345e-06, |
|
"loss": 0.8887, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5068493150684932, |
|
"grad_norm": 0.39128080010414124, |
|
"learning_rate": 4.931506849315069e-06, |
|
"loss": 0.9316, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5088062622309197, |
|
"grad_norm": 0.3334865868091583, |
|
"learning_rate": 4.911937377690803e-06, |
|
"loss": 0.8958, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5107632093933464, |
|
"grad_norm": 0.3332456350326538, |
|
"learning_rate": 4.892367906066536e-06, |
|
"loss": 0.8705, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.512720156555773, |
|
"grad_norm": 0.42276686429977417, |
|
"learning_rate": 4.87279843444227e-06, |
|
"loss": 0.824, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5146771037181996, |
|
"grad_norm": 0.33200517296791077, |
|
"learning_rate": 4.853228962818004e-06, |
|
"loss": 0.905, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5166340508806262, |
|
"grad_norm": 0.3116356134414673, |
|
"learning_rate": 4.833659491193738e-06, |
|
"loss": 0.9098, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5185909980430529, |
|
"grad_norm": 0.33332517743110657, |
|
"learning_rate": 4.814090019569472e-06, |
|
"loss": 0.9436, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5205479452054794, |
|
"grad_norm": 0.3184143900871277, |
|
"learning_rate": 4.7945205479452054e-06, |
|
"loss": 0.9149, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.5225048923679061, |
|
"grad_norm": 0.3486206829547882, |
|
"learning_rate": 4.774951076320939e-06, |
|
"loss": 0.8951, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5244618395303327, |
|
"grad_norm": 0.3263947367668152, |
|
"learning_rate": 4.755381604696674e-06, |
|
"loss": 0.9133, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5264187866927593, |
|
"grad_norm": 0.33816662430763245, |
|
"learning_rate": 4.735812133072407e-06, |
|
"loss": 0.9537, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.5283757338551859, |
|
"grad_norm": 0.4058966338634491, |
|
"learning_rate": 4.716242661448141e-06, |
|
"loss": 0.7974, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5303326810176126, |
|
"grad_norm": 0.33853861689567566, |
|
"learning_rate": 4.6966731898238745e-06, |
|
"loss": 0.9175, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5322896281800391, |
|
"grad_norm": 0.3483884036540985, |
|
"learning_rate": 4.677103718199609e-06, |
|
"loss": 0.8432, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.5342465753424658, |
|
"grad_norm": 0.33916252851486206, |
|
"learning_rate": 4.657534246575343e-06, |
|
"loss": 0.9084, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.5362035225048923, |
|
"grad_norm": 0.3245210349559784, |
|
"learning_rate": 4.637964774951076e-06, |
|
"loss": 0.9392, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.538160469667319, |
|
"grad_norm": 0.382941871881485, |
|
"learning_rate": 4.61839530332681e-06, |
|
"loss": 0.9531, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5401174168297456, |
|
"grad_norm": 0.31128600239753723, |
|
"learning_rate": 4.5988258317025445e-06, |
|
"loss": 0.9656, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5420743639921722, |
|
"grad_norm": 0.36267444491386414, |
|
"learning_rate": 4.579256360078278e-06, |
|
"loss": 0.9052, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.5440313111545988, |
|
"grad_norm": 0.32378819584846497, |
|
"learning_rate": 4.559686888454012e-06, |
|
"loss": 0.9546, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.5459882583170255, |
|
"grad_norm": 0.47103360295295715, |
|
"learning_rate": 4.5401174168297455e-06, |
|
"loss": 0.8386, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 0.34283939003944397, |
|
"learning_rate": 4.52054794520548e-06, |
|
"loss": 0.8913, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5499021526418787, |
|
"grad_norm": 0.33877629041671753, |
|
"learning_rate": 4.500978473581214e-06, |
|
"loss": 0.8588, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.5518590998043053, |
|
"grad_norm": 0.32226869463920593, |
|
"learning_rate": 4.481409001956947e-06, |
|
"loss": 0.9419, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.5538160469667319, |
|
"grad_norm": 0.3250659704208374, |
|
"learning_rate": 4.461839530332681e-06, |
|
"loss": 0.949, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.5557729941291585, |
|
"grad_norm": 0.37836357951164246, |
|
"learning_rate": 4.442270058708415e-06, |
|
"loss": 0.9173, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.5577299412915852, |
|
"grad_norm": 0.3452129364013672, |
|
"learning_rate": 4.422700587084149e-06, |
|
"loss": 0.8923, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5596868884540117, |
|
"grad_norm": 0.3265805244445801, |
|
"learning_rate": 4.403131115459883e-06, |
|
"loss": 0.9209, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.5616438356164384, |
|
"grad_norm": 0.32123324275016785, |
|
"learning_rate": 4.383561643835616e-06, |
|
"loss": 0.984, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.5636007827788649, |
|
"grad_norm": 0.34007397294044495, |
|
"learning_rate": 4.36399217221135e-06, |
|
"loss": 0.9041, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.5655577299412916, |
|
"grad_norm": 0.34763190150260925, |
|
"learning_rate": 4.3444227005870845e-06, |
|
"loss": 0.9266, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.5675146771037182, |
|
"grad_norm": 0.32859864830970764, |
|
"learning_rate": 4.324853228962818e-06, |
|
"loss": 0.9358, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5694716242661448, |
|
"grad_norm": 0.3776375353336334, |
|
"learning_rate": 4.305283757338552e-06, |
|
"loss": 0.9017, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.38739728927612305, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.9524, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.5733855185909981, |
|
"grad_norm": 0.3514958322048187, |
|
"learning_rate": 4.26614481409002e-06, |
|
"loss": 0.9318, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.5753424657534246, |
|
"grad_norm": 0.340966135263443, |
|
"learning_rate": 4.246575342465754e-06, |
|
"loss": 0.921, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.5772994129158513, |
|
"grad_norm": 0.33006027340888977, |
|
"learning_rate": 4.227005870841487e-06, |
|
"loss": 0.9094, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5792563600782779, |
|
"grad_norm": 0.30589374899864197, |
|
"learning_rate": 4.207436399217221e-06, |
|
"loss": 0.9176, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.5812133072407045, |
|
"grad_norm": 0.34072640538215637, |
|
"learning_rate": 4.187866927592955e-06, |
|
"loss": 0.968, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.5831702544031311, |
|
"grad_norm": 0.34003034234046936, |
|
"learning_rate": 4.168297455968689e-06, |
|
"loss": 0.8709, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.5851272015655578, |
|
"grad_norm": 0.3410165011882782, |
|
"learning_rate": 4.148727984344423e-06, |
|
"loss": 0.9126, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.5870841487279843, |
|
"grad_norm": 0.3337312936782837, |
|
"learning_rate": 4.129158512720156e-06, |
|
"loss": 0.852, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.589041095890411, |
|
"grad_norm": 0.48609423637390137, |
|
"learning_rate": 4.109589041095891e-06, |
|
"loss": 0.8855, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.5909980430528375, |
|
"grad_norm": 0.35817044973373413, |
|
"learning_rate": 4.0900195694716245e-06, |
|
"loss": 0.954, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.5929549902152642, |
|
"grad_norm": 0.37432897090911865, |
|
"learning_rate": 4.070450097847358e-06, |
|
"loss": 0.8936, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.5949119373776908, |
|
"grad_norm": 0.3256794512271881, |
|
"learning_rate": 4.050880626223092e-06, |
|
"loss": 0.9175, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.5968688845401174, |
|
"grad_norm": 0.3711596429347992, |
|
"learning_rate": 4.031311154598826e-06, |
|
"loss": 0.9312, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.598825831702544, |
|
"grad_norm": 0.35513797402381897, |
|
"learning_rate": 4.01174168297456e-06, |
|
"loss": 0.9227, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6007827788649707, |
|
"grad_norm": 0.3059983551502228, |
|
"learning_rate": 3.992172211350294e-06, |
|
"loss": 0.9364, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6027397260273972, |
|
"grad_norm": 0.38014575839042664, |
|
"learning_rate": 3.972602739726027e-06, |
|
"loss": 0.8645, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6046966731898239, |
|
"grad_norm": 0.33558711409568787, |
|
"learning_rate": 3.953033268101762e-06, |
|
"loss": 0.9175, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6066536203522505, |
|
"grad_norm": 0.3638705015182495, |
|
"learning_rate": 3.933463796477495e-06, |
|
"loss": 1.02, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6086105675146771, |
|
"grad_norm": 0.341256707906723, |
|
"learning_rate": 3.913894324853229e-06, |
|
"loss": 0.9254, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6105675146771037, |
|
"grad_norm": 0.34499531984329224, |
|
"learning_rate": 3.894324853228963e-06, |
|
"loss": 0.9273, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6125244618395304, |
|
"grad_norm": 0.3527175784111023, |
|
"learning_rate": 3.874755381604696e-06, |
|
"loss": 0.9152, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.6144814090019569, |
|
"grad_norm": 0.3410734534263611, |
|
"learning_rate": 3.855185909980431e-06, |
|
"loss": 0.9186, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.6164383561643836, |
|
"grad_norm": 0.36121881008148193, |
|
"learning_rate": 3.8356164383561645e-06, |
|
"loss": 0.9857, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6183953033268101, |
|
"grad_norm": 0.6107659935951233, |
|
"learning_rate": 3.816046966731898e-06, |
|
"loss": 0.8668, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.6203522504892368, |
|
"grad_norm": 0.3535270094871521, |
|
"learning_rate": 3.7964774951076322e-06, |
|
"loss": 0.8487, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6223091976516634, |
|
"grad_norm": 0.3669748604297638, |
|
"learning_rate": 3.776908023483366e-06, |
|
"loss": 0.9351, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.62426614481409, |
|
"grad_norm": 0.33674487471580505, |
|
"learning_rate": 3.7573385518591e-06, |
|
"loss": 0.8533, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.6262230919765166, |
|
"grad_norm": 0.3490351736545563, |
|
"learning_rate": 3.7377690802348336e-06, |
|
"loss": 0.8261, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6281800391389433, |
|
"grad_norm": 0.34486088156700134, |
|
"learning_rate": 3.7181996086105677e-06, |
|
"loss": 0.9306, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.6301369863013698, |
|
"grad_norm": 0.35340040922164917, |
|
"learning_rate": 3.6986301369863014e-06, |
|
"loss": 0.8714, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.6320939334637965, |
|
"grad_norm": 0.3286992609500885, |
|
"learning_rate": 3.6790606653620354e-06, |
|
"loss": 0.9321, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.6340508806262231, |
|
"grad_norm": 0.3706447184085846, |
|
"learning_rate": 3.659491193737769e-06, |
|
"loss": 0.9171, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.6360078277886497, |
|
"grad_norm": 0.3709685802459717, |
|
"learning_rate": 3.639921722113503e-06, |
|
"loss": 0.9456, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6379647749510763, |
|
"grad_norm": 0.33583569526672363, |
|
"learning_rate": 3.620352250489237e-06, |
|
"loss": 0.9105, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.639921722113503, |
|
"grad_norm": 0.36042001843452454, |
|
"learning_rate": 3.600782778864971e-06, |
|
"loss": 0.9062, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.6418786692759295, |
|
"grad_norm": 0.3614070415496826, |
|
"learning_rate": 3.5812133072407045e-06, |
|
"loss": 0.9101, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.6438356164383562, |
|
"grad_norm": 0.3542083501815796, |
|
"learning_rate": 3.5616438356164386e-06, |
|
"loss": 0.935, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.6457925636007827, |
|
"grad_norm": 0.3305101692676544, |
|
"learning_rate": 3.5420743639921723e-06, |
|
"loss": 0.85, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6477495107632094, |
|
"grad_norm": 0.35562682151794434, |
|
"learning_rate": 3.5225048923679063e-06, |
|
"loss": 0.9223, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.649706457925636, |
|
"grad_norm": 0.3588898777961731, |
|
"learning_rate": 3.50293542074364e-06, |
|
"loss": 0.8486, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.6516634050880626, |
|
"grad_norm": 0.48412322998046875, |
|
"learning_rate": 3.483365949119374e-06, |
|
"loss": 0.9895, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.6536203522504892, |
|
"grad_norm": 0.3221297264099121, |
|
"learning_rate": 3.4637964774951077e-06, |
|
"loss": 0.9376, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.6555772994129159, |
|
"grad_norm": 0.3534998595714569, |
|
"learning_rate": 3.4442270058708418e-06, |
|
"loss": 0.8962, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6575342465753424, |
|
"grad_norm": 0.3436375558376312, |
|
"learning_rate": 3.4246575342465754e-06, |
|
"loss": 0.9241, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.6594911937377691, |
|
"grad_norm": 0.37481996417045593, |
|
"learning_rate": 3.405088062622309e-06, |
|
"loss": 0.85, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.6614481409001957, |
|
"grad_norm": 0.4136059284210205, |
|
"learning_rate": 3.385518590998043e-06, |
|
"loss": 0.8787, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.6634050880626223, |
|
"grad_norm": 0.3450472354888916, |
|
"learning_rate": 3.365949119373777e-06, |
|
"loss": 0.9156, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.6653620352250489, |
|
"grad_norm": 0.3252355754375458, |
|
"learning_rate": 3.346379647749511e-06, |
|
"loss": 0.9292, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6673189823874756, |
|
"grad_norm": 0.36309337615966797, |
|
"learning_rate": 3.3268101761252445e-06, |
|
"loss": 0.8951, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.6692759295499021, |
|
"grad_norm": 0.3402676284313202, |
|
"learning_rate": 3.3072407045009786e-06, |
|
"loss": 0.9409, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.6712328767123288, |
|
"grad_norm": 0.3547208607196808, |
|
"learning_rate": 3.2876712328767123e-06, |
|
"loss": 0.8855, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.6731898238747553, |
|
"grad_norm": 0.348457008600235, |
|
"learning_rate": 3.2681017612524463e-06, |
|
"loss": 0.8965, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.675146771037182, |
|
"grad_norm": 0.3382589519023895, |
|
"learning_rate": 3.24853228962818e-06, |
|
"loss": 0.9357, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6771037181996086, |
|
"grad_norm": 0.35978591442108154, |
|
"learning_rate": 3.228962818003914e-06, |
|
"loss": 0.8625, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.6790606653620352, |
|
"grad_norm": 0.3476986885070801, |
|
"learning_rate": 3.2093933463796477e-06, |
|
"loss": 0.8352, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.6810176125244618, |
|
"grad_norm": 0.36571869254112244, |
|
"learning_rate": 3.189823874755382e-06, |
|
"loss": 0.913, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.6829745596868885, |
|
"grad_norm": 0.33142149448394775, |
|
"learning_rate": 3.1702544031311154e-06, |
|
"loss": 0.8793, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"grad_norm": 0.37687569856643677, |
|
"learning_rate": 3.1506849315068495e-06, |
|
"loss": 0.8976, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6868884540117417, |
|
"grad_norm": 0.3633004128932953, |
|
"learning_rate": 3.131115459882583e-06, |
|
"loss": 0.8668, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.6888454011741683, |
|
"grad_norm": 0.363525390625, |
|
"learning_rate": 3.1115459882583172e-06, |
|
"loss": 0.8561, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.6908023483365949, |
|
"grad_norm": 0.3553753197193146, |
|
"learning_rate": 3.091976516634051e-06, |
|
"loss": 0.8881, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.6927592954990215, |
|
"grad_norm": 0.36212918162345886, |
|
"learning_rate": 3.072407045009785e-06, |
|
"loss": 0.9327, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.6947162426614482, |
|
"grad_norm": 0.3260986804962158, |
|
"learning_rate": 3.0528375733855186e-06, |
|
"loss": 0.8812, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6966731898238747, |
|
"grad_norm": 0.39815372228622437, |
|
"learning_rate": 3.0332681017612527e-06, |
|
"loss": 0.8762, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.6986301369863014, |
|
"grad_norm": 0.34042733907699585, |
|
"learning_rate": 3.0136986301369864e-06, |
|
"loss": 0.9195, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.700587084148728, |
|
"grad_norm": 0.39932090044021606, |
|
"learning_rate": 2.9941291585127204e-06, |
|
"loss": 0.8907, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7025440313111546, |
|
"grad_norm": 0.35055866837501526, |
|
"learning_rate": 2.974559686888454e-06, |
|
"loss": 0.8956, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7045009784735812, |
|
"grad_norm": 0.43682193756103516, |
|
"learning_rate": 2.954990215264188e-06, |
|
"loss": 0.9004, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7064579256360078, |
|
"grad_norm": 0.3480110466480255, |
|
"learning_rate": 2.935420743639922e-06, |
|
"loss": 0.8725, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.7084148727984344, |
|
"grad_norm": 0.3565778136253357, |
|
"learning_rate": 2.9158512720156555e-06, |
|
"loss": 0.9085, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.7103718199608611, |
|
"grad_norm": 0.38167497515678406, |
|
"learning_rate": 2.8962818003913895e-06, |
|
"loss": 0.9225, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.7123287671232876, |
|
"grad_norm": 0.3642929494380951, |
|
"learning_rate": 2.876712328767123e-06, |
|
"loss": 0.8497, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.3579420745372772, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.9122, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7162426614481409, |
|
"grad_norm": 0.4468456506729126, |
|
"learning_rate": 2.837573385518591e-06, |
|
"loss": 0.9301, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.7181996086105675, |
|
"grad_norm": 0.4842437207698822, |
|
"learning_rate": 2.818003913894325e-06, |
|
"loss": 0.8161, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.7201565557729941, |
|
"grad_norm": 0.37980690598487854, |
|
"learning_rate": 2.7984344422700586e-06, |
|
"loss": 0.8068, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.7221135029354208, |
|
"grad_norm": 0.35172978043556213, |
|
"learning_rate": 2.7788649706457927e-06, |
|
"loss": 0.9114, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.7240704500978473, |
|
"grad_norm": 0.34394723176956177, |
|
"learning_rate": 2.7592954990215264e-06, |
|
"loss": 0.9262, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.726027397260274, |
|
"grad_norm": 0.45529139041900635, |
|
"learning_rate": 2.7397260273972604e-06, |
|
"loss": 0.8255, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.7279843444227005, |
|
"grad_norm": 0.3215661644935608, |
|
"learning_rate": 2.720156555772994e-06, |
|
"loss": 0.9848, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.7299412915851272, |
|
"grad_norm": 0.374117374420166, |
|
"learning_rate": 2.700587084148728e-06, |
|
"loss": 0.9345, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.7318982387475538, |
|
"grad_norm": 0.3748462498188019, |
|
"learning_rate": 2.681017612524462e-06, |
|
"loss": 0.9029, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.7338551859099804, |
|
"grad_norm": 0.35281816124916077, |
|
"learning_rate": 2.661448140900196e-06, |
|
"loss": 0.9081, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.735812133072407, |
|
"grad_norm": 0.3568076491355896, |
|
"learning_rate": 2.6418786692759295e-06, |
|
"loss": 0.9359, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.7377690802348337, |
|
"grad_norm": 0.3849165141582489, |
|
"learning_rate": 2.6223091976516636e-06, |
|
"loss": 0.8738, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.7397260273972602, |
|
"grad_norm": 0.33613815903663635, |
|
"learning_rate": 2.6027397260273973e-06, |
|
"loss": 0.8893, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.7416829745596869, |
|
"grad_norm": 0.3870159387588501, |
|
"learning_rate": 2.5831702544031313e-06, |
|
"loss": 0.8985, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.7436399217221135, |
|
"grad_norm": 0.41747015714645386, |
|
"learning_rate": 2.563600782778865e-06, |
|
"loss": 0.9111, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7455968688845401, |
|
"grad_norm": 0.33905646204948425, |
|
"learning_rate": 2.544031311154599e-06, |
|
"loss": 0.9438, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.7475538160469667, |
|
"grad_norm": 0.42772483825683594, |
|
"learning_rate": 2.5244618395303327e-06, |
|
"loss": 0.9123, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.7495107632093934, |
|
"grad_norm": 0.3450902998447418, |
|
"learning_rate": 2.504892367906067e-06, |
|
"loss": 0.9051, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.7514677103718199, |
|
"grad_norm": 0.3520686626434326, |
|
"learning_rate": 2.4853228962818004e-06, |
|
"loss": 0.9282, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.7534246575342466, |
|
"grad_norm": 0.36060193181037903, |
|
"learning_rate": 2.4657534246575345e-06, |
|
"loss": 1.0014, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7553816046966731, |
|
"grad_norm": 0.36178913712501526, |
|
"learning_rate": 2.446183953033268e-06, |
|
"loss": 0.782, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.7573385518590998, |
|
"grad_norm": 0.3532876670360565, |
|
"learning_rate": 2.426614481409002e-06, |
|
"loss": 0.9207, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.7592954990215264, |
|
"grad_norm": 0.34173986315727234, |
|
"learning_rate": 2.407045009784736e-06, |
|
"loss": 0.8727, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.761252446183953, |
|
"grad_norm": 0.34336763620376587, |
|
"learning_rate": 2.3874755381604695e-06, |
|
"loss": 0.8857, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.7632093933463796, |
|
"grad_norm": 0.33171361684799194, |
|
"learning_rate": 2.3679060665362036e-06, |
|
"loss": 0.8882, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7651663405088063, |
|
"grad_norm": 0.3519209325313568, |
|
"learning_rate": 2.3483365949119373e-06, |
|
"loss": 0.8927, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.7671232876712328, |
|
"grad_norm": 0.3307989239692688, |
|
"learning_rate": 2.3287671232876713e-06, |
|
"loss": 0.904, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.7690802348336595, |
|
"grad_norm": 0.3287998139858246, |
|
"learning_rate": 2.309197651663405e-06, |
|
"loss": 0.9244, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.7710371819960861, |
|
"grad_norm": 0.3633367121219635, |
|
"learning_rate": 2.289628180039139e-06, |
|
"loss": 0.8516, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.7729941291585127, |
|
"grad_norm": 0.36312800645828247, |
|
"learning_rate": 2.2700587084148727e-06, |
|
"loss": 0.9231, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7749510763209393, |
|
"grad_norm": 0.3343620002269745, |
|
"learning_rate": 2.250489236790607e-06, |
|
"loss": 0.8798, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.776908023483366, |
|
"grad_norm": 0.41196396946907043, |
|
"learning_rate": 2.2309197651663405e-06, |
|
"loss": 0.8786, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.7788649706457925, |
|
"grad_norm": 0.36387088894844055, |
|
"learning_rate": 2.2113502935420745e-06, |
|
"loss": 0.8739, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.7808219178082192, |
|
"grad_norm": 0.31284716725349426, |
|
"learning_rate": 2.191780821917808e-06, |
|
"loss": 0.9609, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.7827788649706457, |
|
"grad_norm": 0.3968718647956848, |
|
"learning_rate": 2.1722113502935423e-06, |
|
"loss": 0.8576, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7847358121330724, |
|
"grad_norm": 0.346426784992218, |
|
"learning_rate": 2.152641878669276e-06, |
|
"loss": 0.8406, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.786692759295499, |
|
"grad_norm": 0.4300689399242401, |
|
"learning_rate": 2.13307240704501e-06, |
|
"loss": 0.9047, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.7886497064579256, |
|
"grad_norm": 0.32908865809440613, |
|
"learning_rate": 2.1135029354207436e-06, |
|
"loss": 0.9306, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.7906066536203522, |
|
"grad_norm": 0.3870595693588257, |
|
"learning_rate": 2.0939334637964777e-06, |
|
"loss": 0.8387, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.7925636007827789, |
|
"grad_norm": 0.32453787326812744, |
|
"learning_rate": 2.0743639921722114e-06, |
|
"loss": 0.9424, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.7945205479452054, |
|
"grad_norm": 0.2953280806541443, |
|
"learning_rate": 2.0547945205479454e-06, |
|
"loss": 0.955, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.7964774951076321, |
|
"grad_norm": 0.378826767206192, |
|
"learning_rate": 2.035225048923679e-06, |
|
"loss": 0.8313, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.7984344422700587, |
|
"grad_norm": 0.36773788928985596, |
|
"learning_rate": 2.015655577299413e-06, |
|
"loss": 0.8854, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.8003913894324853, |
|
"grad_norm": 0.3617993891239166, |
|
"learning_rate": 1.996086105675147e-06, |
|
"loss": 0.9318, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.8023483365949119, |
|
"grad_norm": 0.3715813159942627, |
|
"learning_rate": 1.976516634050881e-06, |
|
"loss": 0.8805, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8043052837573386, |
|
"grad_norm": 0.3366706073284149, |
|
"learning_rate": 1.9569471624266145e-06, |
|
"loss": 0.9197, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.8062622309197651, |
|
"grad_norm": 0.37290623784065247, |
|
"learning_rate": 1.937377690802348e-06, |
|
"loss": 0.8869, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.8082191780821918, |
|
"grad_norm": 0.34826987981796265, |
|
"learning_rate": 1.9178082191780823e-06, |
|
"loss": 0.8826, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.8101761252446184, |
|
"grad_norm": 0.35748153924942017, |
|
"learning_rate": 1.8982387475538161e-06, |
|
"loss": 0.9207, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.812133072407045, |
|
"grad_norm": 0.3526861071586609, |
|
"learning_rate": 1.87866927592955e-06, |
|
"loss": 0.9348, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8140900195694716, |
|
"grad_norm": 0.3595939874649048, |
|
"learning_rate": 1.8590998043052839e-06, |
|
"loss": 0.9544, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.8160469667318982, |
|
"grad_norm": 0.3745361864566803, |
|
"learning_rate": 1.8395303326810177e-06, |
|
"loss": 0.857, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.8180039138943248, |
|
"grad_norm": 0.3955901563167572, |
|
"learning_rate": 1.8199608610567516e-06, |
|
"loss": 0.8932, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.8199608610567515, |
|
"grad_norm": 0.3213536739349365, |
|
"learning_rate": 1.8003913894324854e-06, |
|
"loss": 0.9079, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 0.36574825644493103, |
|
"learning_rate": 1.7808219178082193e-06, |
|
"loss": 0.9256, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8238747553816047, |
|
"grad_norm": 0.5008761286735535, |
|
"learning_rate": 1.7612524461839532e-06, |
|
"loss": 0.8376, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.8258317025440313, |
|
"grad_norm": 0.312209814786911, |
|
"learning_rate": 1.741682974559687e-06, |
|
"loss": 0.9188, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.8277886497064579, |
|
"grad_norm": 0.4078651964664459, |
|
"learning_rate": 1.7221135029354209e-06, |
|
"loss": 0.9135, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.8297455968688845, |
|
"grad_norm": 0.42918604612350464, |
|
"learning_rate": 1.7025440313111545e-06, |
|
"loss": 0.925, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.8317025440313112, |
|
"grad_norm": 0.3664219081401825, |
|
"learning_rate": 1.6829745596868884e-06, |
|
"loss": 0.9153, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8336594911937377, |
|
"grad_norm": 0.41947075724601746, |
|
"learning_rate": 1.6634050880626223e-06, |
|
"loss": 0.8403, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.8356164383561644, |
|
"grad_norm": 0.5199389457702637, |
|
"learning_rate": 1.6438356164383561e-06, |
|
"loss": 0.9094, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.837573385518591, |
|
"grad_norm": 0.36971110105514526, |
|
"learning_rate": 1.62426614481409e-06, |
|
"loss": 0.8776, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.8395303326810176, |
|
"grad_norm": 0.3708122968673706, |
|
"learning_rate": 1.6046966731898239e-06, |
|
"loss": 0.8424, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.8414872798434442, |
|
"grad_norm": 0.35816383361816406, |
|
"learning_rate": 1.5851272015655577e-06, |
|
"loss": 0.8263, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8434442270058709, |
|
"grad_norm": 0.4561832845211029, |
|
"learning_rate": 1.5655577299412916e-06, |
|
"loss": 0.9035, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.8454011741682974, |
|
"grad_norm": 0.46993499994277954, |
|
"learning_rate": 1.5459882583170254e-06, |
|
"loss": 0.8494, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.8473581213307241, |
|
"grad_norm": 0.3416410982608795, |
|
"learning_rate": 1.5264187866927593e-06, |
|
"loss": 0.9109, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.8493150684931506, |
|
"grad_norm": 0.36532074213027954, |
|
"learning_rate": 1.5068493150684932e-06, |
|
"loss": 0.8966, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.8512720156555773, |
|
"grad_norm": 0.3833313286304474, |
|
"learning_rate": 1.487279843444227e-06, |
|
"loss": 0.9056, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8532289628180039, |
|
"grad_norm": 0.39663517475128174, |
|
"learning_rate": 1.467710371819961e-06, |
|
"loss": 0.8904, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.8551859099804305, |
|
"grad_norm": 0.3750026524066925, |
|
"learning_rate": 1.4481409001956948e-06, |
|
"loss": 0.895, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.3878662586212158, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.8879, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.8590998043052838, |
|
"grad_norm": 0.32945066690444946, |
|
"learning_rate": 1.4090019569471625e-06, |
|
"loss": 0.9154, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.8610567514677103, |
|
"grad_norm": 0.3289746046066284, |
|
"learning_rate": 1.3894324853228964e-06, |
|
"loss": 0.9272, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.863013698630137, |
|
"grad_norm": 0.3634059727191925, |
|
"learning_rate": 1.3698630136986302e-06, |
|
"loss": 0.9226, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.8649706457925636, |
|
"grad_norm": 0.4308583438396454, |
|
"learning_rate": 1.350293542074364e-06, |
|
"loss": 0.9235, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.8669275929549902, |
|
"grad_norm": 0.3874328136444092, |
|
"learning_rate": 1.330724070450098e-06, |
|
"loss": 0.9058, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.8688845401174168, |
|
"grad_norm": 0.3811403512954712, |
|
"learning_rate": 1.3111545988258318e-06, |
|
"loss": 0.8796, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.8708414872798435, |
|
"grad_norm": 0.33906376361846924, |
|
"learning_rate": 1.2915851272015657e-06, |
|
"loss": 0.9051, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.87279843444227, |
|
"grad_norm": 0.3563789427280426, |
|
"learning_rate": 1.2720156555772995e-06, |
|
"loss": 0.8932, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.8747553816046967, |
|
"grad_norm": 0.44213926792144775, |
|
"learning_rate": 1.2524461839530334e-06, |
|
"loss": 0.8787, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.8767123287671232, |
|
"grad_norm": 0.39271780848503113, |
|
"learning_rate": 1.2328767123287673e-06, |
|
"loss": 0.8706, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.8786692759295499, |
|
"grad_norm": 0.3402375280857086, |
|
"learning_rate": 1.213307240704501e-06, |
|
"loss": 0.8404, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.8806262230919765, |
|
"grad_norm": 0.37391403317451477, |
|
"learning_rate": 1.1937377690802348e-06, |
|
"loss": 0.873, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8825831702544031, |
|
"grad_norm": 0.4681132137775421, |
|
"learning_rate": 1.1741682974559686e-06, |
|
"loss": 0.8922, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.8845401174168297, |
|
"grad_norm": 0.37245139479637146, |
|
"learning_rate": 1.1545988258317025e-06, |
|
"loss": 0.9606, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.8864970645792564, |
|
"grad_norm": 0.3633488714694977, |
|
"learning_rate": 1.1350293542074364e-06, |
|
"loss": 0.9371, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.8884540117416829, |
|
"grad_norm": 0.36568257212638855, |
|
"learning_rate": 1.1154598825831702e-06, |
|
"loss": 0.889, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.8904109589041096, |
|
"grad_norm": 0.37727871537208557, |
|
"learning_rate": 1.095890410958904e-06, |
|
"loss": 0.8863, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.8923679060665362, |
|
"grad_norm": 0.3628275990486145, |
|
"learning_rate": 1.076320939334638e-06, |
|
"loss": 0.8753, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.8943248532289628, |
|
"grad_norm": 0.5403597950935364, |
|
"learning_rate": 1.0567514677103718e-06, |
|
"loss": 0.8446, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.8962818003913894, |
|
"grad_norm": 0.37633222341537476, |
|
"learning_rate": 1.0371819960861057e-06, |
|
"loss": 0.8821, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.898238747553816, |
|
"grad_norm": 0.4256667494773865, |
|
"learning_rate": 1.0176125244618395e-06, |
|
"loss": 0.8967, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.9001956947162426, |
|
"grad_norm": 0.37082305550575256, |
|
"learning_rate": 9.980430528375734e-07, |
|
"loss": 0.8644, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9021526418786693, |
|
"grad_norm": 0.35088518261909485, |
|
"learning_rate": 9.784735812133073e-07, |
|
"loss": 0.9007, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.9041095890410958, |
|
"grad_norm": 0.37018847465515137, |
|
"learning_rate": 9.589041095890411e-07, |
|
"loss": 0.8561, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.9060665362035225, |
|
"grad_norm": 0.4181114137172699, |
|
"learning_rate": 9.39334637964775e-07, |
|
"loss": 0.9222, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.9080234833659491, |
|
"grad_norm": 0.3350118100643158, |
|
"learning_rate": 9.197651663405089e-07, |
|
"loss": 0.9061, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.9099804305283757, |
|
"grad_norm": 0.4112285077571869, |
|
"learning_rate": 9.001956947162427e-07, |
|
"loss": 0.8684, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9119373776908023, |
|
"grad_norm": 0.3795412480831146, |
|
"learning_rate": 8.806262230919766e-07, |
|
"loss": 0.8144, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.913894324853229, |
|
"grad_norm": 0.36573439836502075, |
|
"learning_rate": 8.610567514677104e-07, |
|
"loss": 0.8562, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.9158512720156555, |
|
"grad_norm": 0.5129836797714233, |
|
"learning_rate": 8.414872798434442e-07, |
|
"loss": 0.8725, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.9178082191780822, |
|
"grad_norm": 0.3448660373687744, |
|
"learning_rate": 8.219178082191781e-07, |
|
"loss": 0.9071, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.9197651663405088, |
|
"grad_norm": 0.33694183826446533, |
|
"learning_rate": 8.023483365949119e-07, |
|
"loss": 0.8779, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9217221135029354, |
|
"grad_norm": 0.4201546609401703, |
|
"learning_rate": 7.827788649706458e-07, |
|
"loss": 0.9754, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.923679060665362, |
|
"grad_norm": 0.3125755488872528, |
|
"learning_rate": 7.632093933463797e-07, |
|
"loss": 0.9204, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.9256360078277887, |
|
"grad_norm": 0.41351065039634705, |
|
"learning_rate": 7.436399217221135e-07, |
|
"loss": 0.8487, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.9275929549902152, |
|
"grad_norm": 0.3856956660747528, |
|
"learning_rate": 7.240704500978474e-07, |
|
"loss": 0.9087, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.9295499021526419, |
|
"grad_norm": 0.4000626802444458, |
|
"learning_rate": 7.045009784735812e-07, |
|
"loss": 0.8734, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9315068493150684, |
|
"grad_norm": 0.3834664523601532, |
|
"learning_rate": 6.849315068493151e-07, |
|
"loss": 0.8928, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.9334637964774951, |
|
"grad_norm": 0.36856183409690857, |
|
"learning_rate": 6.65362035225049e-07, |
|
"loss": 0.8967, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.9354207436399217, |
|
"grad_norm": 0.34125497937202454, |
|
"learning_rate": 6.457925636007828e-07, |
|
"loss": 0.8501, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.9373776908023483, |
|
"grad_norm": 0.3918203115463257, |
|
"learning_rate": 6.262230919765167e-07, |
|
"loss": 0.8277, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.9393346379647749, |
|
"grad_norm": 0.35030046105384827, |
|
"learning_rate": 6.066536203522505e-07, |
|
"loss": 0.8883, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9412915851272016, |
|
"grad_norm": 0.3345521092414856, |
|
"learning_rate": 5.870841487279843e-07, |
|
"loss": 0.9089, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.9432485322896281, |
|
"grad_norm": 0.7940770983695984, |
|
"learning_rate": 5.675146771037182e-07, |
|
"loss": 0.8447, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.9452054794520548, |
|
"grad_norm": 0.4670639634132385, |
|
"learning_rate": 5.47945205479452e-07, |
|
"loss": 0.9603, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.9471624266144814, |
|
"grad_norm": 0.35085639357566833, |
|
"learning_rate": 5.283757338551859e-07, |
|
"loss": 0.8936, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.949119373776908, |
|
"grad_norm": 0.3659544587135315, |
|
"learning_rate": 5.088062622309198e-07, |
|
"loss": 0.8489, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9510763209393346, |
|
"grad_norm": 0.37457695603370667, |
|
"learning_rate": 4.892367906066536e-07, |
|
"loss": 0.9201, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.9530332681017613, |
|
"grad_norm": 0.3387516140937805, |
|
"learning_rate": 4.696673189823875e-07, |
|
"loss": 0.8846, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.9549902152641878, |
|
"grad_norm": 0.3514867424964905, |
|
"learning_rate": 4.5009784735812136e-07, |
|
"loss": 0.9238, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.9569471624266145, |
|
"grad_norm": 0.3868323564529419, |
|
"learning_rate": 4.305283757338552e-07, |
|
"loss": 0.93, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.958904109589041, |
|
"grad_norm": 0.38455379009246826, |
|
"learning_rate": 4.1095890410958903e-07, |
|
"loss": 0.9301, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9608610567514677, |
|
"grad_norm": 0.360344797372818, |
|
"learning_rate": 3.913894324853229e-07, |
|
"loss": 0.8141, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.9628180039138943, |
|
"grad_norm": 0.3541224002838135, |
|
"learning_rate": 3.7181996086105676e-07, |
|
"loss": 0.9057, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.9647749510763209, |
|
"grad_norm": 0.35285741090774536, |
|
"learning_rate": 3.522504892367906e-07, |
|
"loss": 0.8942, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.9667318982387475, |
|
"grad_norm": 0.3489803969860077, |
|
"learning_rate": 3.326810176125245e-07, |
|
"loss": 0.9787, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.9686888454011742, |
|
"grad_norm": 0.36583074927330017, |
|
"learning_rate": 3.1311154598825835e-07, |
|
"loss": 0.8947, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9706457925636007, |
|
"grad_norm": 0.3927527964115143, |
|
"learning_rate": 2.9354207436399216e-07, |
|
"loss": 0.936, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.9726027397260274, |
|
"grad_norm": 0.37387171387672424, |
|
"learning_rate": 2.73972602739726e-07, |
|
"loss": 0.9358, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.974559686888454, |
|
"grad_norm": 0.36170729994773865, |
|
"learning_rate": 2.544031311154599e-07, |
|
"loss": 0.9211, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.9765166340508806, |
|
"grad_norm": 0.3695358633995056, |
|
"learning_rate": 2.3483365949119375e-07, |
|
"loss": 0.8574, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.9784735812133072, |
|
"grad_norm": 0.3722043037414551, |
|
"learning_rate": 2.152641878669276e-07, |
|
"loss": 0.8627, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9804305283757339, |
|
"grad_norm": 0.3411552309989929, |
|
"learning_rate": 1.9569471624266145e-07, |
|
"loss": 0.8924, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.9823874755381604, |
|
"grad_norm": 0.3667154610157013, |
|
"learning_rate": 1.761252446183953e-07, |
|
"loss": 0.9128, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.9843444227005871, |
|
"grad_norm": 0.36946728825569153, |
|
"learning_rate": 1.5655577299412917e-07, |
|
"loss": 0.8716, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.9863013698630136, |
|
"grad_norm": 0.3377256393432617, |
|
"learning_rate": 1.36986301369863e-07, |
|
"loss": 0.939, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.9882583170254403, |
|
"grad_norm": 0.3812258541584015, |
|
"learning_rate": 1.1741682974559687e-07, |
|
"loss": 0.8871, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.9902152641878669, |
|
"grad_norm": 0.41513141989707947, |
|
"learning_rate": 9.784735812133072e-08, |
|
"loss": 0.8544, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.9921722113502935, |
|
"grad_norm": 0.36569666862487793, |
|
"learning_rate": 7.827788649706459e-08, |
|
"loss": 0.9033, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.9941291585127201, |
|
"grad_norm": 0.36549660563468933, |
|
"learning_rate": 5.870841487279844e-08, |
|
"loss": 0.8103, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.9960861056751468, |
|
"grad_norm": 0.3560737073421478, |
|
"learning_rate": 3.9138943248532294e-08, |
|
"loss": 0.8832, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.9980430528375733, |
|
"grad_norm": 0.35010769963264465, |
|
"learning_rate": 1.9569471624266147e-08, |
|
"loss": 0.8588, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.3699730932712555, |
|
"learning_rate": 0.0, |
|
"loss": 0.9031, |
|
"step": 511 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 511, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8009679088687514e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|