|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1109, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009017132551848512, |
|
"grad_norm": 10.577064169641238, |
|
"learning_rate": 2.9411764705882356e-07, |
|
"loss": 1.9986, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0018034265103697023, |
|
"grad_norm": 10.36517170209171, |
|
"learning_rate": 5.882352941176471e-07, |
|
"loss": 1.9534, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002705139765554554, |
|
"grad_norm": 10.44504678164916, |
|
"learning_rate": 8.823529411764707e-07, |
|
"loss": 1.9749, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0036068530207394047, |
|
"grad_norm": 10.331516165460968, |
|
"learning_rate": 1.1764705882352942e-06, |
|
"loss": 1.9862, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004508566275924256, |
|
"grad_norm": 8.65995855008459, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"loss": 1.9184, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005410279531109108, |
|
"grad_norm": 7.008907681884329, |
|
"learning_rate": 1.7647058823529414e-06, |
|
"loss": 1.8906, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0063119927862939585, |
|
"grad_norm": 4.835169025945767, |
|
"learning_rate": 2.058823529411765e-06, |
|
"loss": 1.8871, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007213706041478809, |
|
"grad_norm": 3.723351018265124, |
|
"learning_rate": 2.3529411764705885e-06, |
|
"loss": 1.8672, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008115419296663661, |
|
"grad_norm": 3.2896828301664245, |
|
"learning_rate": 2.647058823529412e-06, |
|
"loss": 1.8221, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009017132551848512, |
|
"grad_norm": 3.997777920263939, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 1.7994, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009918845807033363, |
|
"grad_norm": 4.007279147531519, |
|
"learning_rate": 3.2352941176470594e-06, |
|
"loss": 1.8035, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.010820559062218215, |
|
"grad_norm": 3.660990251525157, |
|
"learning_rate": 3.529411764705883e-06, |
|
"loss": 1.767, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.011722272317403066, |
|
"grad_norm": 3.3885848000748386, |
|
"learning_rate": 3.8235294117647055e-06, |
|
"loss": 1.7675, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.012623985572587917, |
|
"grad_norm": 3.291223708706854, |
|
"learning_rate": 4.11764705882353e-06, |
|
"loss": 1.7637, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.013525698827772768, |
|
"grad_norm": 3.1417473603402217, |
|
"learning_rate": 4.411764705882353e-06, |
|
"loss": 1.7701, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.014427412082957619, |
|
"grad_norm": 2.763025617498974, |
|
"learning_rate": 4.705882352941177e-06, |
|
"loss": 1.7316, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.015329125338142471, |
|
"grad_norm": 2.3886750440304483, |
|
"learning_rate": 5e-06, |
|
"loss": 1.7236, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.016230838593327322, |
|
"grad_norm": 2.34515652847235, |
|
"learning_rate": 5.294117647058824e-06, |
|
"loss": 1.7138, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.017132551848512173, |
|
"grad_norm": 2.3271436396781393, |
|
"learning_rate": 5.588235294117647e-06, |
|
"loss": 1.7127, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.018034265103697024, |
|
"grad_norm": 2.295163831092699, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 1.6975, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.018935978358881875, |
|
"grad_norm": 2.114400590233431, |
|
"learning_rate": 6.176470588235295e-06, |
|
"loss": 1.6654, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.019837691614066726, |
|
"grad_norm": 1.9750708257462024, |
|
"learning_rate": 6.470588235294119e-06, |
|
"loss": 1.6869, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.020739404869251576, |
|
"grad_norm": 1.6817036959999778, |
|
"learning_rate": 6.764705882352942e-06, |
|
"loss": 1.6651, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02164111812443643, |
|
"grad_norm": 1.6450355892304562, |
|
"learning_rate": 7.058823529411766e-06, |
|
"loss": 1.664, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02254283137962128, |
|
"grad_norm": 1.7120938879923542, |
|
"learning_rate": 7.352941176470589e-06, |
|
"loss": 1.6574, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.023444544634806132, |
|
"grad_norm": 1.8550225833518525, |
|
"learning_rate": 7.647058823529411e-06, |
|
"loss": 1.6281, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.024346257889990983, |
|
"grad_norm": 1.9221800970494332, |
|
"learning_rate": 7.941176470588236e-06, |
|
"loss": 1.6377, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.025247971145175834, |
|
"grad_norm": 1.6995797824544692, |
|
"learning_rate": 8.23529411764706e-06, |
|
"loss": 1.602, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.026149684400360685, |
|
"grad_norm": 1.6358357311194014, |
|
"learning_rate": 8.529411764705883e-06, |
|
"loss": 1.6061, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.027051397655545536, |
|
"grad_norm": 1.580651479451095, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 1.6169, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.027953110910730387, |
|
"grad_norm": 1.5756107806972501, |
|
"learning_rate": 9.11764705882353e-06, |
|
"loss": 1.5969, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.028854824165915238, |
|
"grad_norm": 1.633779158193918, |
|
"learning_rate": 9.411764705882354e-06, |
|
"loss": 1.6143, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.029756537421100092, |
|
"grad_norm": 1.719962101136898, |
|
"learning_rate": 9.705882352941177e-06, |
|
"loss": 1.5881, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.030658250676284943, |
|
"grad_norm": 1.6017616780387625, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5739, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.031559963931469794, |
|
"grad_norm": 1.5450519445519328, |
|
"learning_rate": 9.999978648788802e-06, |
|
"loss": 1.5829, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.032461677186654644, |
|
"grad_norm": 1.4834472866238042, |
|
"learning_rate": 9.999914595337555e-06, |
|
"loss": 1.569, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.033363390441839495, |
|
"grad_norm": 1.498305342680757, |
|
"learning_rate": 9.999807840193305e-06, |
|
"loss": 1.5653, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.034265103697024346, |
|
"grad_norm": 1.5463338086484935, |
|
"learning_rate": 9.999658384267795e-06, |
|
"loss": 1.562, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0351668169522092, |
|
"grad_norm": 1.459365986133512, |
|
"learning_rate": 9.999466228837452e-06, |
|
"loss": 1.5585, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03606853020739405, |
|
"grad_norm": 1.4693046149427762, |
|
"learning_rate": 9.999231375543374e-06, |
|
"loss": 1.5211, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0369702434625789, |
|
"grad_norm": 1.4984282166549738, |
|
"learning_rate": 9.998953826391322e-06, |
|
"loss": 1.5367, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03787195671776375, |
|
"grad_norm": 1.4384393247143472, |
|
"learning_rate": 9.998633583751702e-06, |
|
"loss": 1.5337, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0387736699729486, |
|
"grad_norm": 1.3711548840775858, |
|
"learning_rate": 9.99827065035954e-06, |
|
"loss": 1.5185, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03967538322813345, |
|
"grad_norm": 1.4277933526776405, |
|
"learning_rate": 9.997865029314464e-06, |
|
"loss": 1.5269, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0405770964833183, |
|
"grad_norm": 1.396702454558118, |
|
"learning_rate": 9.997416724080673e-06, |
|
"loss": 1.485, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04147880973850315, |
|
"grad_norm": 1.47668068877586, |
|
"learning_rate": 9.996925738486913e-06, |
|
"loss": 1.5259, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04238052299368801, |
|
"grad_norm": 1.4557526770144735, |
|
"learning_rate": 9.996392076726436e-06, |
|
"loss": 1.5188, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04328223624887286, |
|
"grad_norm": 1.4234416876774554, |
|
"learning_rate": 9.995815743356973e-06, |
|
"loss": 1.5014, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04418394950405771, |
|
"grad_norm": 1.3986193326213034, |
|
"learning_rate": 9.995196743300693e-06, |
|
"loss": 1.4924, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04508566275924256, |
|
"grad_norm": 1.45785760531205, |
|
"learning_rate": 9.994535081844152e-06, |
|
"loss": 1.5302, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.045987376014427414, |
|
"grad_norm": 1.4363542918984882, |
|
"learning_rate": 9.993830764638262e-06, |
|
"loss": 1.4886, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.046889089269612265, |
|
"grad_norm": 1.4273053464550627, |
|
"learning_rate": 9.993083797698231e-06, |
|
"loss": 1.4899, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.047790802524797116, |
|
"grad_norm": 1.348045620568428, |
|
"learning_rate": 9.992294187403522e-06, |
|
"loss": 1.496, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04869251577998197, |
|
"grad_norm": 1.4233792162905572, |
|
"learning_rate": 9.991461940497786e-06, |
|
"loss": 1.4764, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04959422903516682, |
|
"grad_norm": 1.446283496903928, |
|
"learning_rate": 9.990587064088817e-06, |
|
"loss": 1.5144, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05049594229035167, |
|
"grad_norm": 1.4413266921077565, |
|
"learning_rate": 9.989669565648484e-06, |
|
"loss": 1.4634, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.05139765554553652, |
|
"grad_norm": 1.3732602138003445, |
|
"learning_rate": 9.988709453012664e-06, |
|
"loss": 1.5038, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.05229936880072137, |
|
"grad_norm": 1.4130255699072003, |
|
"learning_rate": 9.987706734381188e-06, |
|
"loss": 1.4809, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05320108205590622, |
|
"grad_norm": 1.5087152986620616, |
|
"learning_rate": 9.986661418317759e-06, |
|
"loss": 1.4618, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05410279531109107, |
|
"grad_norm": 1.4798629422697944, |
|
"learning_rate": 9.985573513749881e-06, |
|
"loss": 1.477, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05500450856627592, |
|
"grad_norm": 1.3606687996874915, |
|
"learning_rate": 9.984443029968786e-06, |
|
"loss": 1.4612, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05590622182146077, |
|
"grad_norm": 1.4899050929180402, |
|
"learning_rate": 9.983269976629356e-06, |
|
"loss": 1.4826, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.056807935076645624, |
|
"grad_norm": 1.432893510375255, |
|
"learning_rate": 9.982054363750028e-06, |
|
"loss": 1.4537, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.057709648331830475, |
|
"grad_norm": 1.3576314165902383, |
|
"learning_rate": 9.980796201712734e-06, |
|
"loss": 1.4661, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.058611361587015326, |
|
"grad_norm": 1.3587575875659574, |
|
"learning_rate": 9.979495501262781e-06, |
|
"loss": 1.4677, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.059513074842200184, |
|
"grad_norm": 1.346258372969497, |
|
"learning_rate": 9.978152273508783e-06, |
|
"loss": 1.4545, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.060414788097385035, |
|
"grad_norm": 1.4228289723340597, |
|
"learning_rate": 9.976766529922556e-06, |
|
"loss": 1.4624, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.061316501352569885, |
|
"grad_norm": 1.3706888516420923, |
|
"learning_rate": 9.97533828233902e-06, |
|
"loss": 1.4849, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.062218214607754736, |
|
"grad_norm": 1.3370859842228668, |
|
"learning_rate": 9.973867542956104e-06, |
|
"loss": 1.4578, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06311992786293959, |
|
"grad_norm": 1.4029903761261626, |
|
"learning_rate": 9.972354324334633e-06, |
|
"loss": 1.4526, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06402164111812443, |
|
"grad_norm": 1.356224319416608, |
|
"learning_rate": 9.970798639398228e-06, |
|
"loss": 1.4337, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06492335437330929, |
|
"grad_norm": 1.4780697718410634, |
|
"learning_rate": 9.969200501433192e-06, |
|
"loss": 1.4494, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06582506762849413, |
|
"grad_norm": 1.3463132650913565, |
|
"learning_rate": 9.967559924088395e-06, |
|
"loss": 1.4364, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06672678088367899, |
|
"grad_norm": 1.3743615711298545, |
|
"learning_rate": 9.965876921375165e-06, |
|
"loss": 1.4429, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06762849413886383, |
|
"grad_norm": 1.38909407325101, |
|
"learning_rate": 9.964151507667162e-06, |
|
"loss": 1.46, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06853020739404869, |
|
"grad_norm": 1.3866587453202093, |
|
"learning_rate": 9.962383697700252e-06, |
|
"loss": 1.4517, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06943192064923355, |
|
"grad_norm": 1.3624396855314103, |
|
"learning_rate": 9.960573506572391e-06, |
|
"loss": 1.4366, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0703336339044184, |
|
"grad_norm": 1.4528468294670056, |
|
"learning_rate": 9.958720949743485e-06, |
|
"loss": 1.4529, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07123534715960325, |
|
"grad_norm": 1.3751194277907128, |
|
"learning_rate": 9.956826043035268e-06, |
|
"loss": 1.4159, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0721370604147881, |
|
"grad_norm": 1.4929981371791885, |
|
"learning_rate": 9.954888802631164e-06, |
|
"loss": 1.431, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07303877366997295, |
|
"grad_norm": 1.44904285843575, |
|
"learning_rate": 9.952909245076141e-06, |
|
"loss": 1.4405, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0739404869251578, |
|
"grad_norm": 1.389639089486905, |
|
"learning_rate": 9.950887387276582e-06, |
|
"loss": 1.4687, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07484220018034266, |
|
"grad_norm": 1.4392438942511672, |
|
"learning_rate": 9.948823246500132e-06, |
|
"loss": 1.454, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0757439134355275, |
|
"grad_norm": 1.3544399783536702, |
|
"learning_rate": 9.946716840375552e-06, |
|
"loss": 1.4374, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.07664562669071236, |
|
"grad_norm": 1.3558114162311536, |
|
"learning_rate": 9.944568186892572e-06, |
|
"loss": 1.4245, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0775473399458972, |
|
"grad_norm": 1.3558935909615983, |
|
"learning_rate": 9.94237730440173e-06, |
|
"loss": 1.4482, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.07844905320108206, |
|
"grad_norm": 1.4002193410756965, |
|
"learning_rate": 9.940144211614231e-06, |
|
"loss": 1.4092, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0793507664562669, |
|
"grad_norm": 1.4084436792953672, |
|
"learning_rate": 9.937868927601765e-06, |
|
"loss": 1.455, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.08025247971145176, |
|
"grad_norm": 1.507546442338142, |
|
"learning_rate": 9.935551471796358e-06, |
|
"loss": 1.4109, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0811541929666366, |
|
"grad_norm": 1.3621915852437079, |
|
"learning_rate": 9.93319186399021e-06, |
|
"loss": 1.4119, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08205590622182146, |
|
"grad_norm": 1.4680244963224889, |
|
"learning_rate": 9.930790124335511e-06, |
|
"loss": 1.4164, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0829576194770063, |
|
"grad_norm": 1.5004514234560973, |
|
"learning_rate": 9.928346273344283e-06, |
|
"loss": 1.4236, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.08385933273219116, |
|
"grad_norm": 1.3932901143805643, |
|
"learning_rate": 9.925860331888197e-06, |
|
"loss": 1.4134, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08476104598737602, |
|
"grad_norm": 1.4024872552349996, |
|
"learning_rate": 9.923332321198396e-06, |
|
"loss": 1.4297, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08566275924256087, |
|
"grad_norm": 1.4468222192078053, |
|
"learning_rate": 9.92076226286532e-06, |
|
"loss": 1.4238, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08656447249774572, |
|
"grad_norm": 1.434206641991036, |
|
"learning_rate": 9.918150178838509e-06, |
|
"loss": 1.4353, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.08746618575293057, |
|
"grad_norm": 1.4452250943034404, |
|
"learning_rate": 9.915496091426425e-06, |
|
"loss": 1.4128, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.08836789900811542, |
|
"grad_norm": 1.4487965090694912, |
|
"learning_rate": 9.912800023296263e-06, |
|
"loss": 1.3926, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.08926961226330027, |
|
"grad_norm": 1.465430632187407, |
|
"learning_rate": 9.910061997473753e-06, |
|
"loss": 1.3985, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09017132551848513, |
|
"grad_norm": 1.450793459097913, |
|
"learning_rate": 9.907282037342957e-06, |
|
"loss": 1.405, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09107303877366997, |
|
"grad_norm": 1.4155012864644325, |
|
"learning_rate": 9.904460166646084e-06, |
|
"loss": 1.4281, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.09197475202885483, |
|
"grad_norm": 1.4146435969702247, |
|
"learning_rate": 9.901596409483277e-06, |
|
"loss": 1.4252, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09287646528403967, |
|
"grad_norm": 1.4129494426948097, |
|
"learning_rate": 9.898690790312409e-06, |
|
"loss": 1.3925, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.09377817853922453, |
|
"grad_norm": 1.391884326471153, |
|
"learning_rate": 9.895743333948875e-06, |
|
"loss": 1.374, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09467989179440937, |
|
"grad_norm": 1.4266801593025809, |
|
"learning_rate": 9.892754065565382e-06, |
|
"loss": 1.3885, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09558160504959423, |
|
"grad_norm": 1.4783691046465195, |
|
"learning_rate": 9.88972301069173e-06, |
|
"loss": 1.43, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.09648331830477908, |
|
"grad_norm": 1.4381943632103706, |
|
"learning_rate": 9.886650195214594e-06, |
|
"loss": 1.407, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.09738503155996393, |
|
"grad_norm": 1.4774136648375966, |
|
"learning_rate": 9.883535645377307e-06, |
|
"loss": 1.4126, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.09828674481514878, |
|
"grad_norm": 1.4848585593390986, |
|
"learning_rate": 9.880379387779637e-06, |
|
"loss": 1.4301, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.09918845807033363, |
|
"grad_norm": 1.3447046383888597, |
|
"learning_rate": 9.877181449377549e-06, |
|
"loss": 1.4095, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10009017132551848, |
|
"grad_norm": 1.485125665692246, |
|
"learning_rate": 9.873941857482988e-06, |
|
"loss": 1.3941, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.10099188458070334, |
|
"grad_norm": 1.4386601202642741, |
|
"learning_rate": 9.87066063976364e-06, |
|
"loss": 1.3867, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1018935978358882, |
|
"grad_norm": 1.4124995951533683, |
|
"learning_rate": 9.867337824242691e-06, |
|
"loss": 1.3913, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.10279531109107304, |
|
"grad_norm": 1.3851353292060657, |
|
"learning_rate": 9.863973439298597e-06, |
|
"loss": 1.4185, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1036970243462579, |
|
"grad_norm": 1.3931533260228668, |
|
"learning_rate": 9.860567513664836e-06, |
|
"loss": 1.4086, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.10459873760144274, |
|
"grad_norm": 1.4179784405117548, |
|
"learning_rate": 9.857120076429662e-06, |
|
"loss": 1.4144, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.1055004508566276, |
|
"grad_norm": 1.428702853117983, |
|
"learning_rate": 9.85363115703586e-06, |
|
"loss": 1.3668, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.10640216411181244, |
|
"grad_norm": 1.3577671717067978, |
|
"learning_rate": 9.85010078528049e-06, |
|
"loss": 1.4005, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.1073038773669973, |
|
"grad_norm": 1.429587957434509, |
|
"learning_rate": 9.846528991314638e-06, |
|
"loss": 1.4016, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.10820559062218214, |
|
"grad_norm": 1.4148608462310461, |
|
"learning_rate": 9.842915805643156e-06, |
|
"loss": 1.3833, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.109107303877367, |
|
"grad_norm": 1.4243012408251199, |
|
"learning_rate": 9.8392612591244e-06, |
|
"loss": 1.398, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.11000901713255185, |
|
"grad_norm": 1.3830341629731753, |
|
"learning_rate": 9.835565382969967e-06, |
|
"loss": 1.3933, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.1109107303877367, |
|
"grad_norm": 1.3566631053070333, |
|
"learning_rate": 9.83182820874443e-06, |
|
"loss": 1.356, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.11181244364292155, |
|
"grad_norm": 1.4976837799309841, |
|
"learning_rate": 9.82804976836507e-06, |
|
"loss": 1.3716, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1127141568981064, |
|
"grad_norm": 1.5201556480768976, |
|
"learning_rate": 9.824230094101591e-06, |
|
"loss": 1.4088, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11361587015329125, |
|
"grad_norm": 1.4654780557555434, |
|
"learning_rate": 9.820369218575871e-06, |
|
"loss": 1.3733, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1145175834084761, |
|
"grad_norm": 1.421025453696537, |
|
"learning_rate": 9.816467174761655e-06, |
|
"loss": 1.3962, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.11541929666366095, |
|
"grad_norm": 1.4262157083025124, |
|
"learning_rate": 9.812523995984281e-06, |
|
"loss": 1.3729, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.11632100991884581, |
|
"grad_norm": 1.495933174346428, |
|
"learning_rate": 9.808539715920415e-06, |
|
"loss": 1.4102, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.11722272317403065, |
|
"grad_norm": 1.4162668123468176, |
|
"learning_rate": 9.804514368597735e-06, |
|
"loss": 1.3732, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11812443642921551, |
|
"grad_norm": 1.4056884823900608, |
|
"learning_rate": 9.800447988394657e-06, |
|
"loss": 1.4001, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.11902614968440037, |
|
"grad_norm": 1.379636688570927, |
|
"learning_rate": 9.79634061004004e-06, |
|
"loss": 1.3874, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.11992786293958521, |
|
"grad_norm": 1.3822580890806864, |
|
"learning_rate": 9.792192268612881e-06, |
|
"loss": 1.3586, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.12082957619477007, |
|
"grad_norm": 1.382134945197591, |
|
"learning_rate": 9.78800299954203e-06, |
|
"loss": 1.4071, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.12173128944995491, |
|
"grad_norm": 1.4059077728114613, |
|
"learning_rate": 9.783772838605874e-06, |
|
"loss": 1.3829, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.12263300270513977, |
|
"grad_norm": 1.4279808755935588, |
|
"learning_rate": 9.779501821932033e-06, |
|
"loss": 1.4187, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.12353471596032461, |
|
"grad_norm": 1.3435404866724177, |
|
"learning_rate": 9.775189985997062e-06, |
|
"loss": 1.391, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.12443642921550947, |
|
"grad_norm": 1.3484226031400397, |
|
"learning_rate": 9.770837367626129e-06, |
|
"loss": 1.3655, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.12533814247069433, |
|
"grad_norm": 1.4154141732809218, |
|
"learning_rate": 9.766444003992704e-06, |
|
"loss": 1.3935, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.12623985572587917, |
|
"grad_norm": 1.3582775740218958, |
|
"learning_rate": 9.762009932618237e-06, |
|
"loss": 1.3836, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12714156898106402, |
|
"grad_norm": 1.4019326999739066, |
|
"learning_rate": 9.75753519137185e-06, |
|
"loss": 1.3656, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.12804328223624886, |
|
"grad_norm": 1.3873034739629564, |
|
"learning_rate": 9.753019818469998e-06, |
|
"loss": 1.3783, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.12894499549143373, |
|
"grad_norm": 1.3402006066598218, |
|
"learning_rate": 9.748463852476156e-06, |
|
"loss": 1.3687, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.12984670874661858, |
|
"grad_norm": 1.468790905251283, |
|
"learning_rate": 9.743867332300478e-06, |
|
"loss": 1.3896, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.13074842200180342, |
|
"grad_norm": 1.3625578204301965, |
|
"learning_rate": 9.739230297199477e-06, |
|
"loss": 1.3888, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.13165013525698827, |
|
"grad_norm": 1.4208683043924826, |
|
"learning_rate": 9.734552786775678e-06, |
|
"loss": 1.3664, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.13255184851217314, |
|
"grad_norm": 1.4500142087444388, |
|
"learning_rate": 9.729834840977284e-06, |
|
"loss": 1.3982, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.13345356176735798, |
|
"grad_norm": 1.436842860684569, |
|
"learning_rate": 9.72507650009784e-06, |
|
"loss": 1.3604, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.13435527502254282, |
|
"grad_norm": 1.3701784757913484, |
|
"learning_rate": 9.720277804775879e-06, |
|
"loss": 1.3466, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.13525698827772767, |
|
"grad_norm": 1.4124491632817213, |
|
"learning_rate": 9.715438795994587e-06, |
|
"loss": 1.3636, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13615870153291254, |
|
"grad_norm": 1.456826333942723, |
|
"learning_rate": 9.710559515081446e-06, |
|
"loss": 1.3634, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.13706041478809738, |
|
"grad_norm": 1.412896803942778, |
|
"learning_rate": 9.705640003707873e-06, |
|
"loss": 1.382, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.13796212804328223, |
|
"grad_norm": 1.484485406644004, |
|
"learning_rate": 9.700680303888883e-06, |
|
"loss": 1.3983, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.1388638412984671, |
|
"grad_norm": 1.4513023024553309, |
|
"learning_rate": 9.695680457982713e-06, |
|
"loss": 1.3747, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.13976555455365194, |
|
"grad_norm": 1.4425274167979576, |
|
"learning_rate": 9.69064050869047e-06, |
|
"loss": 1.3836, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1406672678088368, |
|
"grad_norm": 1.4223525469811833, |
|
"learning_rate": 9.685560499055764e-06, |
|
"loss": 1.3659, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.14156898106402163, |
|
"grad_norm": 1.385031691152652, |
|
"learning_rate": 9.680440472464337e-06, |
|
"loss": 1.3549, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1424706943192065, |
|
"grad_norm": 1.4266749431487284, |
|
"learning_rate": 9.675280472643696e-06, |
|
"loss": 1.3661, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.14337240757439135, |
|
"grad_norm": 1.5012666041389382, |
|
"learning_rate": 9.670080543662742e-06, |
|
"loss": 1.3752, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.1442741208295762, |
|
"grad_norm": 1.415739936413478, |
|
"learning_rate": 9.664840729931385e-06, |
|
"loss": 1.3805, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14517583408476104, |
|
"grad_norm": 1.4181819218823457, |
|
"learning_rate": 9.659561076200173e-06, |
|
"loss": 1.3884, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.1460775473399459, |
|
"grad_norm": 1.3719905980017162, |
|
"learning_rate": 9.654241627559908e-06, |
|
"loss": 1.3512, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.14697926059513075, |
|
"grad_norm": 1.4212733014049073, |
|
"learning_rate": 9.648882429441258e-06, |
|
"loss": 1.3587, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1478809738503156, |
|
"grad_norm": 1.4127423687960647, |
|
"learning_rate": 9.643483527614372e-06, |
|
"loss": 1.3593, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.14878268710550044, |
|
"grad_norm": 1.4008058963023071, |
|
"learning_rate": 9.638044968188486e-06, |
|
"loss": 1.375, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1496844003606853, |
|
"grad_norm": 1.382227169874824, |
|
"learning_rate": 9.632566797611535e-06, |
|
"loss": 1.3601, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.15058611361587015, |
|
"grad_norm": 1.3915418749349733, |
|
"learning_rate": 9.627049062669747e-06, |
|
"loss": 1.3595, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.151487826871055, |
|
"grad_norm": 1.408864080362016, |
|
"learning_rate": 9.621491810487251e-06, |
|
"loss": 1.367, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.15238954012623984, |
|
"grad_norm": 1.4146808141780156, |
|
"learning_rate": 9.615895088525677e-06, |
|
"loss": 1.3566, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.1532912533814247, |
|
"grad_norm": 1.3902356321346545, |
|
"learning_rate": 9.61025894458374e-06, |
|
"loss": 1.3764, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.15419296663660956, |
|
"grad_norm": 1.3597088612356067, |
|
"learning_rate": 9.604583426796837e-06, |
|
"loss": 1.351, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.1550946798917944, |
|
"grad_norm": 1.3720474596763996, |
|
"learning_rate": 9.598868583636644e-06, |
|
"loss": 1.3824, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.15599639314697927, |
|
"grad_norm": 1.3707229743231295, |
|
"learning_rate": 9.593114463910687e-06, |
|
"loss": 1.367, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.15689810640216412, |
|
"grad_norm": 1.3725553676605047, |
|
"learning_rate": 9.587321116761938e-06, |
|
"loss": 1.3599, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.15779981965734896, |
|
"grad_norm": 1.4142122379930755, |
|
"learning_rate": 9.581488591668389e-06, |
|
"loss": 1.3453, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1587015329125338, |
|
"grad_norm": 1.3536864485589797, |
|
"learning_rate": 9.57561693844263e-06, |
|
"loss": 1.3353, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.15960324616771868, |
|
"grad_norm": 1.4132800800716323, |
|
"learning_rate": 9.56970620723142e-06, |
|
"loss": 1.3537, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.16050495942290352, |
|
"grad_norm": 1.3587637930643957, |
|
"learning_rate": 9.563756448515273e-06, |
|
"loss": 1.3526, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.16140667267808836, |
|
"grad_norm": 1.3765918418070524, |
|
"learning_rate": 9.557767713108009e-06, |
|
"loss": 1.3452, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1623083859332732, |
|
"grad_norm": 1.3475505521784306, |
|
"learning_rate": 9.551740052156326e-06, |
|
"loss": 1.3572, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16321009918845808, |
|
"grad_norm": 1.4357564962345402, |
|
"learning_rate": 9.545673517139376e-06, |
|
"loss": 1.3636, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.16411181244364292, |
|
"grad_norm": 1.4697976472825107, |
|
"learning_rate": 9.5395681598683e-06, |
|
"loss": 1.3441, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.16501352569882777, |
|
"grad_norm": 1.4148432779008302, |
|
"learning_rate": 9.533424032485812e-06, |
|
"loss": 1.3691, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1659152389540126, |
|
"grad_norm": 1.459319873185255, |
|
"learning_rate": 9.527241187465735e-06, |
|
"loss": 1.3249, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.16681695220919748, |
|
"grad_norm": 1.3570335357652492, |
|
"learning_rate": 9.521019677612559e-06, |
|
"loss": 1.3674, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.16771866546438233, |
|
"grad_norm": 1.3486828991979471, |
|
"learning_rate": 9.514759556060996e-06, |
|
"loss": 1.3375, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.16862037871956717, |
|
"grad_norm": 1.373345392575501, |
|
"learning_rate": 9.508460876275514e-06, |
|
"loss": 1.3231, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.16952209197475204, |
|
"grad_norm": 1.3929600168838754, |
|
"learning_rate": 9.502123692049889e-06, |
|
"loss": 1.3471, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.1704238052299369, |
|
"grad_norm": 1.4193281036609189, |
|
"learning_rate": 9.49574805750675e-06, |
|
"loss": 1.3619, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.17132551848512173, |
|
"grad_norm": 1.3910185797527803, |
|
"learning_rate": 9.4893340270971e-06, |
|
"loss": 1.3498, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.17222723174030657, |
|
"grad_norm": 1.3411529365941561, |
|
"learning_rate": 9.482881655599867e-06, |
|
"loss": 1.363, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.17312894499549145, |
|
"grad_norm": 1.3530788239084923, |
|
"learning_rate": 9.47639099812143e-06, |
|
"loss": 1.3447, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.1740306582506763, |
|
"grad_norm": 1.4060276890862744, |
|
"learning_rate": 9.46986211009515e-06, |
|
"loss": 1.3603, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.17493237150586113, |
|
"grad_norm": 1.4002742616983794, |
|
"learning_rate": 9.463295047280892e-06, |
|
"loss": 1.325, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.17583408476104598, |
|
"grad_norm": 1.4079001802402094, |
|
"learning_rate": 9.456689865764554e-06, |
|
"loss": 1.3732, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.17673579801623085, |
|
"grad_norm": 1.366324818080461, |
|
"learning_rate": 9.450046621957587e-06, |
|
"loss": 1.3497, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1776375112714157, |
|
"grad_norm": 1.366857559507007, |
|
"learning_rate": 9.443365372596511e-06, |
|
"loss": 1.3287, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.17853922452660054, |
|
"grad_norm": 1.3873422124784134, |
|
"learning_rate": 9.436646174742432e-06, |
|
"loss": 1.341, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.17944093778178538, |
|
"grad_norm": 1.3742935051526575, |
|
"learning_rate": 9.429889085780559e-06, |
|
"loss": 1.3247, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.18034265103697025, |
|
"grad_norm": 1.4007870712786872, |
|
"learning_rate": 9.4230941634197e-06, |
|
"loss": 1.3604, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1812443642921551, |
|
"grad_norm": 1.340061281395059, |
|
"learning_rate": 9.416261465691786e-06, |
|
"loss": 1.3594, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.18214607754733994, |
|
"grad_norm": 1.4279648538396195, |
|
"learning_rate": 9.409391050951367e-06, |
|
"loss": 1.3556, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.18304779080252478, |
|
"grad_norm": 1.3474825489077324, |
|
"learning_rate": 9.402482977875112e-06, |
|
"loss": 1.3348, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.18394950405770966, |
|
"grad_norm": 1.3021713820720349, |
|
"learning_rate": 9.395537305461312e-06, |
|
"loss": 1.3372, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1848512173128945, |
|
"grad_norm": 1.3439292199743982, |
|
"learning_rate": 9.388554093029376e-06, |
|
"loss": 1.3539, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.18575293056807934, |
|
"grad_norm": 1.3572209464576004, |
|
"learning_rate": 9.381533400219319e-06, |
|
"loss": 1.3227, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.18665464382326422, |
|
"grad_norm": 1.3727277388728627, |
|
"learning_rate": 9.37447528699126e-06, |
|
"loss": 1.3767, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.18755635707844906, |
|
"grad_norm": 1.3897163176087035, |
|
"learning_rate": 9.367379813624908e-06, |
|
"loss": 1.3304, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.1884580703336339, |
|
"grad_norm": 1.3659826029945907, |
|
"learning_rate": 9.36024704071904e-06, |
|
"loss": 1.3495, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.18935978358881875, |
|
"grad_norm": 1.4281647829676292, |
|
"learning_rate": 9.35307702919099e-06, |
|
"loss": 1.3315, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.19026149684400362, |
|
"grad_norm": 1.425082817286243, |
|
"learning_rate": 9.345869840276138e-06, |
|
"loss": 1.3374, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.19116321009918846, |
|
"grad_norm": 1.3669686996346657, |
|
"learning_rate": 9.338625535527363e-06, |
|
"loss": 1.329, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1920649233543733, |
|
"grad_norm": 1.4493780187902503, |
|
"learning_rate": 9.331344176814537e-06, |
|
"loss": 1.3309, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.19296663660955815, |
|
"grad_norm": 1.3984652860472455, |
|
"learning_rate": 9.324025826323995e-06, |
|
"loss": 1.3447, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.19386834986474302, |
|
"grad_norm": 1.3758656367160043, |
|
"learning_rate": 9.316670546557994e-06, |
|
"loss": 1.3339, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.19477006311992787, |
|
"grad_norm": 1.3483825039725506, |
|
"learning_rate": 9.309278400334184e-06, |
|
"loss": 1.329, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.1956717763751127, |
|
"grad_norm": 1.4046844670196472, |
|
"learning_rate": 9.301849450785077e-06, |
|
"loss": 1.3239, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.19657348963029755, |
|
"grad_norm": 1.378369632031669, |
|
"learning_rate": 9.294383761357503e-06, |
|
"loss": 1.3293, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.19747520288548243, |
|
"grad_norm": 1.390583412942, |
|
"learning_rate": 9.286881395812066e-06, |
|
"loss": 1.3583, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.19837691614066727, |
|
"grad_norm": 1.3569691678927214, |
|
"learning_rate": 9.279342418222602e-06, |
|
"loss": 1.3416, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1992786293958521, |
|
"grad_norm": 1.5011309111070126, |
|
"learning_rate": 9.271766892975632e-06, |
|
"loss": 1.3408, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.20018034265103696, |
|
"grad_norm": 1.3022805869624663, |
|
"learning_rate": 9.264154884769811e-06, |
|
"loss": 1.3236, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.20108205590622183, |
|
"grad_norm": 1.387897289165249, |
|
"learning_rate": 9.256506458615378e-06, |
|
"loss": 1.3469, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.20198376916140667, |
|
"grad_norm": 1.4397245147743074, |
|
"learning_rate": 9.248821679833596e-06, |
|
"loss": 1.3522, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.20288548241659152, |
|
"grad_norm": 1.3137706884917066, |
|
"learning_rate": 9.241100614056202e-06, |
|
"loss": 1.3244, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2037871956717764, |
|
"grad_norm": 1.3663543578550792, |
|
"learning_rate": 9.233343327224836e-06, |
|
"loss": 1.3152, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.20468890892696123, |
|
"grad_norm": 1.349090231463568, |
|
"learning_rate": 9.225549885590487e-06, |
|
"loss": 1.3465, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.20559062218214608, |
|
"grad_norm": 1.4177971106430631, |
|
"learning_rate": 9.217720355712924e-06, |
|
"loss": 1.3592, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.20649233543733092, |
|
"grad_norm": 1.4430064774802602, |
|
"learning_rate": 9.209854804460121e-06, |
|
"loss": 1.3283, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.2073940486925158, |
|
"grad_norm": 1.380627101897418, |
|
"learning_rate": 9.2019532990077e-06, |
|
"loss": 1.3315, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.20829576194770064, |
|
"grad_norm": 1.3293715152695407, |
|
"learning_rate": 9.194015906838345e-06, |
|
"loss": 1.3191, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.20919747520288548, |
|
"grad_norm": 1.408506235413438, |
|
"learning_rate": 9.186042695741228e-06, |
|
"loss": 1.3445, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.21009918845807032, |
|
"grad_norm": 1.4125157387882301, |
|
"learning_rate": 9.17803373381144e-06, |
|
"loss": 1.3189, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.2110009017132552, |
|
"grad_norm": 1.3525250458202043, |
|
"learning_rate": 9.16998908944939e-06, |
|
"loss": 1.3423, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.21190261496844004, |
|
"grad_norm": 1.3865242623211698, |
|
"learning_rate": 9.161908831360242e-06, |
|
"loss": 1.3126, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.21280432822362488, |
|
"grad_norm": 1.377272880444935, |
|
"learning_rate": 9.153793028553314e-06, |
|
"loss": 1.3309, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.21370604147880973, |
|
"grad_norm": 1.341817175736238, |
|
"learning_rate": 9.145641750341495e-06, |
|
"loss": 1.3212, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.2146077547339946, |
|
"grad_norm": 1.3248285979696608, |
|
"learning_rate": 9.137455066340647e-06, |
|
"loss": 1.3317, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.21550946798917944, |
|
"grad_norm": 1.395068585478875, |
|
"learning_rate": 9.129233046469021e-06, |
|
"loss": 1.337, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.2164111812443643, |
|
"grad_norm": 1.326627012011638, |
|
"learning_rate": 9.120975760946649e-06, |
|
"loss": 1.3243, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.21731289449954913, |
|
"grad_norm": 1.3411117908902908, |
|
"learning_rate": 9.11268328029475e-06, |
|
"loss": 1.3478, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.218214607754734, |
|
"grad_norm": 1.354756713038773, |
|
"learning_rate": 9.104355675335124e-06, |
|
"loss": 1.3342, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.21911632100991885, |
|
"grad_norm": 1.4335828362826124, |
|
"learning_rate": 9.095993017189554e-06, |
|
"loss": 1.3222, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.2200180342651037, |
|
"grad_norm": 1.368829614316604, |
|
"learning_rate": 9.087595377279192e-06, |
|
"loss": 1.3337, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.22091974752028856, |
|
"grad_norm": 1.345046468626478, |
|
"learning_rate": 9.079162827323951e-06, |
|
"loss": 1.3293, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2218214607754734, |
|
"grad_norm": 1.4050717986225727, |
|
"learning_rate": 9.070695439341894e-06, |
|
"loss": 1.319, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.22272317403065825, |
|
"grad_norm": 1.334690550660624, |
|
"learning_rate": 9.062193285648616e-06, |
|
"loss": 1.3142, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.2236248872858431, |
|
"grad_norm": 1.336435641504262, |
|
"learning_rate": 9.053656438856629e-06, |
|
"loss": 1.3453, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.22452660054102797, |
|
"grad_norm": 1.4419914394186921, |
|
"learning_rate": 9.045084971874738e-06, |
|
"loss": 1.3324, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.2254283137962128, |
|
"grad_norm": 1.333464805154606, |
|
"learning_rate": 9.036478957907426e-06, |
|
"loss": 1.3299, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.22633002705139765, |
|
"grad_norm": 1.4001946831119945, |
|
"learning_rate": 9.027838470454222e-06, |
|
"loss": 1.3152, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.2272317403065825, |
|
"grad_norm": 1.3082432976301495, |
|
"learning_rate": 9.019163583309077e-06, |
|
"loss": 1.3188, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.22813345356176737, |
|
"grad_norm": 1.3085213764833319, |
|
"learning_rate": 9.010454370559723e-06, |
|
"loss": 1.3324, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.2290351668169522, |
|
"grad_norm": 1.4543733572461475, |
|
"learning_rate": 9.001710906587064e-06, |
|
"loss": 1.3465, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.22993688007213706, |
|
"grad_norm": 1.41721500158758, |
|
"learning_rate": 8.992933266064514e-06, |
|
"loss": 1.3262, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2308385933273219, |
|
"grad_norm": 1.2985535487758335, |
|
"learning_rate": 8.984121523957376e-06, |
|
"loss": 1.3244, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.23174030658250677, |
|
"grad_norm": 1.3463578437506631, |
|
"learning_rate": 8.9752757555222e-06, |
|
"loss": 1.3237, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.23264201983769162, |
|
"grad_norm": 1.3789337432141782, |
|
"learning_rate": 8.96639603630613e-06, |
|
"loss": 1.3008, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.23354373309287646, |
|
"grad_norm": 1.305138757492257, |
|
"learning_rate": 8.957482442146271e-06, |
|
"loss": 1.3433, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.2344454463480613, |
|
"grad_norm": 1.3237926877005564, |
|
"learning_rate": 8.948535049169038e-06, |
|
"loss": 1.3605, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.23534715960324618, |
|
"grad_norm": 1.3324147166992832, |
|
"learning_rate": 8.939553933789499e-06, |
|
"loss": 1.3266, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.23624887285843102, |
|
"grad_norm": 1.3695811772880973, |
|
"learning_rate": 8.93053917271073e-06, |
|
"loss": 1.3074, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.23715058611361586, |
|
"grad_norm": 1.3544201012293267, |
|
"learning_rate": 8.921490842923164e-06, |
|
"loss": 1.3187, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.23805229936880073, |
|
"grad_norm": 1.3484671838622388, |
|
"learning_rate": 8.912409021703914e-06, |
|
"loss": 1.3293, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.23895401262398558, |
|
"grad_norm": 1.4310584062261378, |
|
"learning_rate": 8.903293786616136e-06, |
|
"loss": 1.3142, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.23985572587917042, |
|
"grad_norm": 1.3744205816678494, |
|
"learning_rate": 8.894145215508355e-06, |
|
"loss": 1.3398, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.24075743913435527, |
|
"grad_norm": 1.3790009084369972, |
|
"learning_rate": 8.884963386513798e-06, |
|
"loss": 1.3037, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.24165915238954014, |
|
"grad_norm": 1.450743624351617, |
|
"learning_rate": 8.875748378049734e-06, |
|
"loss": 1.3258, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.24256086564472498, |
|
"grad_norm": 1.374563750527912, |
|
"learning_rate": 8.866500268816803e-06, |
|
"loss": 1.2894, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.24346257889990983, |
|
"grad_norm": 1.3361505315525928, |
|
"learning_rate": 8.857219137798331e-06, |
|
"loss": 1.3078, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.24436429215509467, |
|
"grad_norm": 1.4356181066392604, |
|
"learning_rate": 8.847905064259683e-06, |
|
"loss": 1.3074, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.24526600541027954, |
|
"grad_norm": 1.429244400428148, |
|
"learning_rate": 8.838558127747551e-06, |
|
"loss": 1.3456, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.24616771866546439, |
|
"grad_norm": 1.315895913876898, |
|
"learning_rate": 8.829178408089305e-06, |
|
"loss": 1.3021, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.24706943192064923, |
|
"grad_norm": 1.3578720410840832, |
|
"learning_rate": 8.819765985392297e-06, |
|
"loss": 1.3145, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.24797114517583407, |
|
"grad_norm": 1.3227358335583927, |
|
"learning_rate": 8.810320940043173e-06, |
|
"loss": 1.2991, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.24887285843101895, |
|
"grad_norm": 1.3473711974386464, |
|
"learning_rate": 8.800843352707197e-06, |
|
"loss": 1.3305, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.2497745716862038, |
|
"grad_norm": 1.3837401955745958, |
|
"learning_rate": 8.79133330432756e-06, |
|
"loss": 1.3239, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.25067628494138866, |
|
"grad_norm": 1.3473227503086935, |
|
"learning_rate": 8.781790876124679e-06, |
|
"loss": 1.3422, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.2515779981965735, |
|
"grad_norm": 1.3098795045608111, |
|
"learning_rate": 8.772216149595515e-06, |
|
"loss": 1.3196, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.25247971145175835, |
|
"grad_norm": 1.3488357463698006, |
|
"learning_rate": 8.762609206512871e-06, |
|
"loss": 1.3021, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2533814247069432, |
|
"grad_norm": 1.3906118010589408, |
|
"learning_rate": 8.752970128924696e-06, |
|
"loss": 1.2946, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.25428313796212804, |
|
"grad_norm": 1.314415592417016, |
|
"learning_rate": 8.743298999153382e-06, |
|
"loss": 1.2997, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2551848512173129, |
|
"grad_norm": 1.3428549757574573, |
|
"learning_rate": 8.733595899795065e-06, |
|
"loss": 1.3446, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2560865644724977, |
|
"grad_norm": 1.4193472151599897, |
|
"learning_rate": 8.72386091371891e-06, |
|
"loss": 1.3319, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.2569882777276826, |
|
"grad_norm": 1.407796205568918, |
|
"learning_rate": 8.714094124066417e-06, |
|
"loss": 1.3153, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.25788999098286747, |
|
"grad_norm": 1.368919703126466, |
|
"learning_rate": 8.704295614250702e-06, |
|
"loss": 1.3227, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.2587917042380523, |
|
"grad_norm": 1.4588445043175615, |
|
"learning_rate": 8.694465467955787e-06, |
|
"loss": 1.3217, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.25969341749323716, |
|
"grad_norm": 1.375947403721078, |
|
"learning_rate": 8.68460376913588e-06, |
|
"loss": 1.3237, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.260595130748422, |
|
"grad_norm": 1.4003625630261938, |
|
"learning_rate": 8.674710602014672e-06, |
|
"loss": 1.3122, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.26149684400360684, |
|
"grad_norm": 1.3382545085519817, |
|
"learning_rate": 8.664786051084597e-06, |
|
"loss": 1.3101, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2623985572587917, |
|
"grad_norm": 1.4028482237388922, |
|
"learning_rate": 8.654830201106133e-06, |
|
"loss": 1.3257, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.26330027051397653, |
|
"grad_norm": 1.3153731433952243, |
|
"learning_rate": 8.644843137107058e-06, |
|
"loss": 1.3028, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.26420198376916143, |
|
"grad_norm": 1.3764170057217833, |
|
"learning_rate": 8.634824944381742e-06, |
|
"loss": 1.3147, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.2651036970243463, |
|
"grad_norm": 1.359807928213873, |
|
"learning_rate": 8.624775708490403e-06, |
|
"loss": 1.2961, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2660054102795311, |
|
"grad_norm": 1.3720659056268978, |
|
"learning_rate": 8.61469551525838e-06, |
|
"loss": 1.2905, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.26690712353471596, |
|
"grad_norm": 1.3558845329560982, |
|
"learning_rate": 8.604584450775414e-06, |
|
"loss": 1.3164, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.2678088367899008, |
|
"grad_norm": 1.369224365175921, |
|
"learning_rate": 8.594442601394889e-06, |
|
"loss": 1.3027, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.26871055004508565, |
|
"grad_norm": 1.3395926586651208, |
|
"learning_rate": 8.584270053733112e-06, |
|
"loss": 1.2874, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2696122633002705, |
|
"grad_norm": 1.3410001780528837, |
|
"learning_rate": 8.574066894668573e-06, |
|
"loss": 1.3137, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.27051397655545534, |
|
"grad_norm": 1.354326879069816, |
|
"learning_rate": 8.56383321134119e-06, |
|
"loss": 1.3243, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.27141568981064024, |
|
"grad_norm": 1.3373692508440478, |
|
"learning_rate": 8.553569091151576e-06, |
|
"loss": 1.3162, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.2723174030658251, |
|
"grad_norm": 1.308680304455333, |
|
"learning_rate": 8.543274621760294e-06, |
|
"loss": 1.3215, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.2732191163210099, |
|
"grad_norm": 1.3423833776970107, |
|
"learning_rate": 8.532949891087095e-06, |
|
"loss": 1.3025, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.27412082957619477, |
|
"grad_norm": 1.3888393861713075, |
|
"learning_rate": 8.522594987310184e-06, |
|
"loss": 1.3124, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.2750225428313796, |
|
"grad_norm": 1.3464218824388667, |
|
"learning_rate": 8.512209998865457e-06, |
|
"loss": 1.292, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.27592425608656446, |
|
"grad_norm": 1.3555408304024268, |
|
"learning_rate": 8.501795014445746e-06, |
|
"loss": 1.3027, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2768259693417493, |
|
"grad_norm": 1.3546921352993, |
|
"learning_rate": 8.491350123000061e-06, |
|
"loss": 1.3414, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.2777276825969342, |
|
"grad_norm": 1.3141979508125459, |
|
"learning_rate": 8.48087541373284e-06, |
|
"loss": 1.2961, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.27862939585211904, |
|
"grad_norm": 1.2874735002984588, |
|
"learning_rate": 8.470370976103171e-06, |
|
"loss": 1.3051, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.2795311091073039, |
|
"grad_norm": 1.3614748416247762, |
|
"learning_rate": 8.45983689982404e-06, |
|
"loss": 1.2838, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.28043282236248873, |
|
"grad_norm": 1.3225207951086084, |
|
"learning_rate": 8.449273274861566e-06, |
|
"loss": 1.3064, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.2813345356176736, |
|
"grad_norm": 1.2914181710971653, |
|
"learning_rate": 8.438680191434221e-06, |
|
"loss": 1.293, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.2822362488728584, |
|
"grad_norm": 1.3066035639024423, |
|
"learning_rate": 8.428057740012073e-06, |
|
"loss": 1.2807, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.28313796212804326, |
|
"grad_norm": 1.3473408996502214, |
|
"learning_rate": 8.417406011316e-06, |
|
"loss": 1.3109, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.2840396753832281, |
|
"grad_norm": 1.3630749762023038, |
|
"learning_rate": 8.406725096316923e-06, |
|
"loss": 1.2907, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.284941388638413, |
|
"grad_norm": 1.3055465275152396, |
|
"learning_rate": 8.396015086235037e-06, |
|
"loss": 1.2946, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.28584310189359785, |
|
"grad_norm": 1.3941479121765923, |
|
"learning_rate": 8.385276072539014e-06, |
|
"loss": 1.3111, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.2867448151487827, |
|
"grad_norm": 1.3493271368855428, |
|
"learning_rate": 8.374508146945235e-06, |
|
"loss": 1.3202, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.28764652840396754, |
|
"grad_norm": 1.2962490184385833, |
|
"learning_rate": 8.363711401417e-06, |
|
"loss": 1.3176, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2885482416591524, |
|
"grad_norm": 1.4044951956948102, |
|
"learning_rate": 8.352885928163748e-06, |
|
"loss": 1.3084, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2894499549143372, |
|
"grad_norm": 1.387641068650636, |
|
"learning_rate": 8.342031819640263e-06, |
|
"loss": 1.2983, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.29035166816952207, |
|
"grad_norm": 1.2797748641028517, |
|
"learning_rate": 8.331149168545892e-06, |
|
"loss": 1.2838, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.29125338142470697, |
|
"grad_norm": 1.3045921019734228, |
|
"learning_rate": 8.320238067823749e-06, |
|
"loss": 1.292, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.2921550946798918, |
|
"grad_norm": 1.3694421776920578, |
|
"learning_rate": 8.309298610659917e-06, |
|
"loss": 1.3046, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.29305680793507666, |
|
"grad_norm": 1.348666455986645, |
|
"learning_rate": 8.298330890482661e-06, |
|
"loss": 1.2992, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2939585211902615, |
|
"grad_norm": 1.343752165915506, |
|
"learning_rate": 8.28733500096163e-06, |
|
"loss": 1.3167, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.29486023444544635, |
|
"grad_norm": 1.3977838323584155, |
|
"learning_rate": 8.276311036007041e-06, |
|
"loss": 1.2958, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.2957619477006312, |
|
"grad_norm": 1.3705652789984946, |
|
"learning_rate": 8.2652590897689e-06, |
|
"loss": 1.3303, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.29666366095581603, |
|
"grad_norm": 1.3378543071202886, |
|
"learning_rate": 8.25417925663618e-06, |
|
"loss": 1.3004, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.2975653742110009, |
|
"grad_norm": 1.3746197234875142, |
|
"learning_rate": 8.243071631236023e-06, |
|
"loss": 1.2947, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2984670874661858, |
|
"grad_norm": 1.3322172718318712, |
|
"learning_rate": 8.231936308432935e-06, |
|
"loss": 1.3004, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.2993688007213706, |
|
"grad_norm": 1.3224285481826337, |
|
"learning_rate": 8.220773383327964e-06, |
|
"loss": 1.3201, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.30027051397655546, |
|
"grad_norm": 1.3659756588727383, |
|
"learning_rate": 8.209582951257901e-06, |
|
"loss": 1.293, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.3011722272317403, |
|
"grad_norm": 1.3750276505406167, |
|
"learning_rate": 8.198365107794457e-06, |
|
"loss": 1.2945, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.30207394048692515, |
|
"grad_norm": 1.3626396570368906, |
|
"learning_rate": 8.18711994874345e-06, |
|
"loss": 1.2826, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.30297565374211, |
|
"grad_norm": 1.349389254932144, |
|
"learning_rate": 8.175847570143985e-06, |
|
"loss": 1.3043, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.30387736699729484, |
|
"grad_norm": 1.4016282244858203, |
|
"learning_rate": 8.164548068267638e-06, |
|
"loss": 1.3022, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.3047790802524797, |
|
"grad_norm": 1.3460811412408489, |
|
"learning_rate": 8.153221539617627e-06, |
|
"loss": 1.3046, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.3056807935076646, |
|
"grad_norm": 1.299653550889351, |
|
"learning_rate": 8.141868080927998e-06, |
|
"loss": 1.2623, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.3065825067628494, |
|
"grad_norm": 1.3804264420920427, |
|
"learning_rate": 8.130487789162784e-06, |
|
"loss": 1.2922, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.30748422001803427, |
|
"grad_norm": 1.3738872809619862, |
|
"learning_rate": 8.119080761515197e-06, |
|
"loss": 1.3044, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.3083859332732191, |
|
"grad_norm": 1.3211682882089721, |
|
"learning_rate": 8.107647095406773e-06, |
|
"loss": 1.2938, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.30928764652840396, |
|
"grad_norm": 1.3569560470529722, |
|
"learning_rate": 8.09618688848656e-06, |
|
"loss": 1.2996, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.3101893597835888, |
|
"grad_norm": 1.3729719633915038, |
|
"learning_rate": 8.084700238630283e-06, |
|
"loss": 1.3086, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.31109107303877365, |
|
"grad_norm": 1.406994450093395, |
|
"learning_rate": 8.073187243939494e-06, |
|
"loss": 1.3043, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.31199278629395855, |
|
"grad_norm": 1.3654434645311497, |
|
"learning_rate": 8.061648002740743e-06, |
|
"loss": 1.3023, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.3128944995491434, |
|
"grad_norm": 1.3240616315684701, |
|
"learning_rate": 8.050082613584745e-06, |
|
"loss": 1.2766, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.31379621280432823, |
|
"grad_norm": 1.407262370116193, |
|
"learning_rate": 8.038491175245523e-06, |
|
"loss": 1.3004, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.3146979260595131, |
|
"grad_norm": 1.372462705333482, |
|
"learning_rate": 8.026873786719574e-06, |
|
"loss": 1.2837, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.3155996393146979, |
|
"grad_norm": 1.274181826236207, |
|
"learning_rate": 8.01523054722503e-06, |
|
"loss": 1.2945, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.31650135256988277, |
|
"grad_norm": 1.4141977100447898, |
|
"learning_rate": 8.003561556200796e-06, |
|
"loss": 1.2876, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.3174030658250676, |
|
"grad_norm": 1.3649136441566765, |
|
"learning_rate": 7.991866913305705e-06, |
|
"loss": 1.3149, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.31830477908025245, |
|
"grad_norm": 1.345609642702963, |
|
"learning_rate": 7.980146718417677e-06, |
|
"loss": 1.2899, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.31920649233543735, |
|
"grad_norm": 1.3482430455703702, |
|
"learning_rate": 7.968401071632854e-06, |
|
"loss": 1.2998, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.3201082055906222, |
|
"grad_norm": 1.4862722731895457, |
|
"learning_rate": 7.956630073264746e-06, |
|
"loss": 1.287, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.32100991884580704, |
|
"grad_norm": 1.3099568378155075, |
|
"learning_rate": 7.94483382384339e-06, |
|
"loss": 1.2857, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.3219116321009919, |
|
"grad_norm": 1.389663476713791, |
|
"learning_rate": 7.933012424114463e-06, |
|
"loss": 1.2643, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.32281334535617673, |
|
"grad_norm": 1.358115897466939, |
|
"learning_rate": 7.92116597503845e-06, |
|
"loss": 1.2963, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.3237150586113616, |
|
"grad_norm": 1.407271812376636, |
|
"learning_rate": 7.909294577789765e-06, |
|
"loss": 1.3218, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.3246167718665464, |
|
"grad_norm": 1.3215185181274458, |
|
"learning_rate": 7.897398333755892e-06, |
|
"loss": 1.2808, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3255184851217313, |
|
"grad_norm": 1.3448206583595448, |
|
"learning_rate": 7.885477344536516e-06, |
|
"loss": 1.262, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.32642019837691616, |
|
"grad_norm": 1.3369298028154637, |
|
"learning_rate": 7.873531711942664e-06, |
|
"loss": 1.2948, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.327321911632101, |
|
"grad_norm": 1.3476691147339084, |
|
"learning_rate": 7.861561537995825e-06, |
|
"loss": 1.2867, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.32822362488728585, |
|
"grad_norm": 1.277983015349736, |
|
"learning_rate": 7.849566924927082e-06, |
|
"loss": 1.2919, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.3291253381424707, |
|
"grad_norm": 1.3747711906676852, |
|
"learning_rate": 7.837547975176243e-06, |
|
"loss": 1.2826, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.33002705139765554, |
|
"grad_norm": 1.4226836538925995, |
|
"learning_rate": 7.825504791390962e-06, |
|
"loss": 1.2753, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.3309287646528404, |
|
"grad_norm": 1.3591430506296809, |
|
"learning_rate": 7.813437476425863e-06, |
|
"loss": 1.315, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.3318304779080252, |
|
"grad_norm": 1.3810536824360335, |
|
"learning_rate": 7.801346133341663e-06, |
|
"loss": 1.2983, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.3327321911632101, |
|
"grad_norm": 1.3918849098123023, |
|
"learning_rate": 7.789230865404287e-06, |
|
"loss": 1.2789, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.33363390441839497, |
|
"grad_norm": 1.3944386013086512, |
|
"learning_rate": 7.777091776083996e-06, |
|
"loss": 1.3068, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3345356176735798, |
|
"grad_norm": 1.3315482446866465, |
|
"learning_rate": 7.764928969054493e-06, |
|
"loss": 1.3001, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.33543733092876465, |
|
"grad_norm": 1.334078947941813, |
|
"learning_rate": 7.752742548192042e-06, |
|
"loss": 1.2957, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.3363390441839495, |
|
"grad_norm": 1.349358556672528, |
|
"learning_rate": 7.74053261757458e-06, |
|
"loss": 1.281, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.33724075743913434, |
|
"grad_norm": 1.3764708203915843, |
|
"learning_rate": 7.728299281480833e-06, |
|
"loss": 1.2959, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.3381424706943192, |
|
"grad_norm": 1.3835774406343864, |
|
"learning_rate": 7.716042644389417e-06, |
|
"loss": 1.2834, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3390441839495041, |
|
"grad_norm": 1.407266558876184, |
|
"learning_rate": 7.70376281097795e-06, |
|
"loss": 1.2942, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.33994589720468893, |
|
"grad_norm": 1.3515850606540596, |
|
"learning_rate": 7.69145988612216e-06, |
|
"loss": 1.2577, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.3408476104598738, |
|
"grad_norm": 1.307205353895994, |
|
"learning_rate": 7.679133974894984e-06, |
|
"loss": 1.2955, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.3417493237150586, |
|
"grad_norm": 1.4701520498603482, |
|
"learning_rate": 7.666785182565676e-06, |
|
"loss": 1.2532, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.34265103697024346, |
|
"grad_norm": 1.3260869562172477, |
|
"learning_rate": 7.654413614598905e-06, |
|
"loss": 1.3014, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3435527502254283, |
|
"grad_norm": 1.3383055059934015, |
|
"learning_rate": 7.642019376653858e-06, |
|
"loss": 1.2616, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.34445446348061315, |
|
"grad_norm": 1.3342827241300619, |
|
"learning_rate": 7.62960257458333e-06, |
|
"loss": 1.2798, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.345356176735798, |
|
"grad_norm": 1.3650978733267973, |
|
"learning_rate": 7.617163314432825e-06, |
|
"loss": 1.2619, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.3462578899909829, |
|
"grad_norm": 1.2878440106478128, |
|
"learning_rate": 7.604701702439652e-06, |
|
"loss": 1.2949, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.34715960324616774, |
|
"grad_norm": 1.3114645587549885, |
|
"learning_rate": 7.592217845032016e-06, |
|
"loss": 1.2857, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3480613165013526, |
|
"grad_norm": 1.312097101465185, |
|
"learning_rate": 7.579711848828106e-06, |
|
"loss": 1.2875, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.3489630297565374, |
|
"grad_norm": 1.351670846135159, |
|
"learning_rate": 7.567183820635189e-06, |
|
"loss": 1.2838, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.34986474301172227, |
|
"grad_norm": 1.3153701472924362, |
|
"learning_rate": 7.554633867448695e-06, |
|
"loss": 1.2935, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.3507664562669071, |
|
"grad_norm": 1.3124645024087132, |
|
"learning_rate": 7.542062096451306e-06, |
|
"loss": 1.2747, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.35166816952209196, |
|
"grad_norm": 1.2839138356985629, |
|
"learning_rate": 7.5294686150120345e-06, |
|
"loss": 1.2661, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3525698827772768, |
|
"grad_norm": 1.3058425890142953, |
|
"learning_rate": 7.5168535306853155e-06, |
|
"loss": 1.2878, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.3534715960324617, |
|
"grad_norm": 1.3249207369867737, |
|
"learning_rate": 7.50421695121008e-06, |
|
"loss": 1.2868, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.35437330928764654, |
|
"grad_norm": 1.2942765461903978, |
|
"learning_rate": 7.491558984508838e-06, |
|
"loss": 1.2862, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.3552750225428314, |
|
"grad_norm": 1.3224112637420926, |
|
"learning_rate": 7.4788797386867596e-06, |
|
"loss": 1.2769, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.35617673579801623, |
|
"grad_norm": 1.3206566542639389, |
|
"learning_rate": 7.466179322030746e-06, |
|
"loss": 1.2846, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3570784490532011, |
|
"grad_norm": 1.3631450867826957, |
|
"learning_rate": 7.453457843008509e-06, |
|
"loss": 1.284, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.3579801623083859, |
|
"grad_norm": 1.3218571416387632, |
|
"learning_rate": 7.4407154102676425e-06, |
|
"loss": 1.3038, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.35888187556357076, |
|
"grad_norm": 1.317177282255559, |
|
"learning_rate": 7.427952132634694e-06, |
|
"loss": 1.2509, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.35978358881875566, |
|
"grad_norm": 1.3276673394491625, |
|
"learning_rate": 7.41516811911424e-06, |
|
"loss": 1.2644, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.3606853020739405, |
|
"grad_norm": 1.280809217458966, |
|
"learning_rate": 7.402363478887948e-06, |
|
"loss": 1.285, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36158701532912535, |
|
"grad_norm": 1.3571731498903, |
|
"learning_rate": 7.389538321313652e-06, |
|
"loss": 1.2977, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.3624887285843102, |
|
"grad_norm": 1.4009686853014174, |
|
"learning_rate": 7.376692755924407e-06, |
|
"loss": 1.2784, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.36339044183949504, |
|
"grad_norm": 1.2677194762164836, |
|
"learning_rate": 7.363826892427568e-06, |
|
"loss": 1.2985, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.3642921550946799, |
|
"grad_norm": 1.3137009718811887, |
|
"learning_rate": 7.350940840703842e-06, |
|
"loss": 1.2726, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.3651938683498647, |
|
"grad_norm": 1.2806871619916333, |
|
"learning_rate": 7.338034710806353e-06, |
|
"loss": 1.2854, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.36609558160504957, |
|
"grad_norm": 1.34164695933686, |
|
"learning_rate": 7.3251086129597034e-06, |
|
"loss": 1.2927, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.36699729486023447, |
|
"grad_norm": 1.3014689973098728, |
|
"learning_rate": 7.312162657559031e-06, |
|
"loss": 1.2824, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.3678990081154193, |
|
"grad_norm": 1.2963420961664436, |
|
"learning_rate": 7.299196955169068e-06, |
|
"loss": 1.2833, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.36880072137060416, |
|
"grad_norm": 1.2885380885948925, |
|
"learning_rate": 7.286211616523193e-06, |
|
"loss": 1.2802, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.369702434625789, |
|
"grad_norm": 1.2629464462465954, |
|
"learning_rate": 7.2732067525224914e-06, |
|
"loss": 1.2885, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.37060414788097384, |
|
"grad_norm": 1.2729298983223787, |
|
"learning_rate": 7.2601824742347985e-06, |
|
"loss": 1.2759, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.3715058611361587, |
|
"grad_norm": 1.3560121385795936, |
|
"learning_rate": 7.247138892893765e-06, |
|
"loss": 1.2683, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.37240757439134353, |
|
"grad_norm": 1.3408137997088863, |
|
"learning_rate": 7.2340761198978916e-06, |
|
"loss": 1.2827, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.37330928764652843, |
|
"grad_norm": 1.3745114451521934, |
|
"learning_rate": 7.220994266809591e-06, |
|
"loss": 1.2957, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.3742110009017133, |
|
"grad_norm": 1.346575107900885, |
|
"learning_rate": 7.207893445354224e-06, |
|
"loss": 1.2978, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3751127141568981, |
|
"grad_norm": 1.2830969629139972, |
|
"learning_rate": 7.1947737674191555e-06, |
|
"loss": 1.2925, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.37601442741208296, |
|
"grad_norm": 1.3694758238273899, |
|
"learning_rate": 7.1816353450527886e-06, |
|
"loss": 1.2821, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3769161406672678, |
|
"grad_norm": 1.3231983523784938, |
|
"learning_rate": 7.1684782904636174e-06, |
|
"loss": 1.2968, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.37781785392245265, |
|
"grad_norm": 1.2669291717660884, |
|
"learning_rate": 7.155302716019263e-06, |
|
"loss": 1.2601, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.3787195671776375, |
|
"grad_norm": 1.3454544044505505, |
|
"learning_rate": 7.142108734245512e-06, |
|
"loss": 1.3008, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.37962128043282234, |
|
"grad_norm": 1.3216303173172852, |
|
"learning_rate": 7.128896457825364e-06, |
|
"loss": 1.2821, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.38052299368800724, |
|
"grad_norm": 1.3829956233217842, |
|
"learning_rate": 7.115665999598058e-06, |
|
"loss": 1.2677, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.3814247069431921, |
|
"grad_norm": 1.312479941373894, |
|
"learning_rate": 7.10241747255812e-06, |
|
"loss": 1.2753, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.3823264201983769, |
|
"grad_norm": 1.3644599578334198, |
|
"learning_rate": 7.089150989854385e-06, |
|
"loss": 1.2736, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.38322813345356177, |
|
"grad_norm": 1.3270302655112538, |
|
"learning_rate": 7.075866664789047e-06, |
|
"loss": 1.2996, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3841298467087466, |
|
"grad_norm": 1.3217555259246643, |
|
"learning_rate": 7.062564610816678e-06, |
|
"loss": 1.2545, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.38503155996393146, |
|
"grad_norm": 1.3491461800964386, |
|
"learning_rate": 7.049244941543259e-06, |
|
"loss": 1.291, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.3859332732191163, |
|
"grad_norm": 1.3556856304743925, |
|
"learning_rate": 7.0359077707252235e-06, |
|
"loss": 1.2747, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.38683498647430115, |
|
"grad_norm": 1.3561707578414417, |
|
"learning_rate": 7.022553212268469e-06, |
|
"loss": 1.2791, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.38773669972948605, |
|
"grad_norm": 1.3184506441485386, |
|
"learning_rate": 7.0091813802273965e-06, |
|
"loss": 1.2883, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3886384129846709, |
|
"grad_norm": 1.263280337390235, |
|
"learning_rate": 6.995792388803929e-06, |
|
"loss": 1.2777, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.38954012623985573, |
|
"grad_norm": 1.297689514662243, |
|
"learning_rate": 6.9823863523465405e-06, |
|
"loss": 1.2461, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3904418394950406, |
|
"grad_norm": 1.342033341696052, |
|
"learning_rate": 6.968963385349277e-06, |
|
"loss": 1.2509, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3913435527502254, |
|
"grad_norm": 1.360711918633311, |
|
"learning_rate": 6.95552360245078e-06, |
|
"loss": 1.2967, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.39224526600541026, |
|
"grad_norm": 1.3324380530143383, |
|
"learning_rate": 6.942067118433308e-06, |
|
"loss": 1.2773, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3931469792605951, |
|
"grad_norm": 1.3761059794482413, |
|
"learning_rate": 6.92859404822175e-06, |
|
"loss": 1.2832, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.39404869251578, |
|
"grad_norm": 1.3702237680815197, |
|
"learning_rate": 6.9151045068826584e-06, |
|
"loss": 1.2687, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.39495040577096485, |
|
"grad_norm": 1.3487692751034914, |
|
"learning_rate": 6.9015986096232465e-06, |
|
"loss": 1.291, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.3958521190261497, |
|
"grad_norm": 1.3424423254670161, |
|
"learning_rate": 6.888076471790423e-06, |
|
"loss": 1.2621, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.39675383228133454, |
|
"grad_norm": 1.3843979031440812, |
|
"learning_rate": 6.874538208869797e-06, |
|
"loss": 1.2767, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3976555455365194, |
|
"grad_norm": 1.3069934768452458, |
|
"learning_rate": 6.860983936484689e-06, |
|
"loss": 1.2866, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3985572587917042, |
|
"grad_norm": 1.3106394157833179, |
|
"learning_rate": 6.8474137703951574e-06, |
|
"loss": 1.2749, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.39945897204688907, |
|
"grad_norm": 1.2783459290470887, |
|
"learning_rate": 6.83382782649699e-06, |
|
"loss": 1.2763, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.4003606853020739, |
|
"grad_norm": 1.300911163405327, |
|
"learning_rate": 6.820226220820733e-06, |
|
"loss": 1.2837, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.4012623985572588, |
|
"grad_norm": 1.3533201412174218, |
|
"learning_rate": 6.806609069530687e-06, |
|
"loss": 1.2334, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.40216411181244366, |
|
"grad_norm": 1.3410157731632268, |
|
"learning_rate": 6.7929764889239235e-06, |
|
"loss": 1.2695, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.4030658250676285, |
|
"grad_norm": 1.3159135620461133, |
|
"learning_rate": 6.779328595429282e-06, |
|
"loss": 1.2759, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.40396753832281335, |
|
"grad_norm": 1.3429134308900144, |
|
"learning_rate": 6.765665505606389e-06, |
|
"loss": 1.2639, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.4048692515779982, |
|
"grad_norm": 1.358085645434167, |
|
"learning_rate": 6.7519873361446475e-06, |
|
"loss": 1.2709, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.40577096483318303, |
|
"grad_norm": 1.282126956537775, |
|
"learning_rate": 6.738294203862255e-06, |
|
"loss": 1.2801, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4066726780883679, |
|
"grad_norm": 1.3820387277990962, |
|
"learning_rate": 6.724586225705191e-06, |
|
"loss": 1.2791, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.4075743913435528, |
|
"grad_norm": 1.3163223637459345, |
|
"learning_rate": 6.710863518746233e-06, |
|
"loss": 1.2556, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.4084761045987376, |
|
"grad_norm": 1.2796002323586544, |
|
"learning_rate": 6.697126200183945e-06, |
|
"loss": 1.2749, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.40937781785392247, |
|
"grad_norm": 1.3546933591445498, |
|
"learning_rate": 6.683374387341688e-06, |
|
"loss": 1.2883, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.4102795311091073, |
|
"grad_norm": 1.3487555368396058, |
|
"learning_rate": 6.669608197666599e-06, |
|
"loss": 1.2743, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.41118124436429215, |
|
"grad_norm": 1.266890989390273, |
|
"learning_rate": 6.655827748728613e-06, |
|
"loss": 1.2544, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.412082957619477, |
|
"grad_norm": 1.2531573983607907, |
|
"learning_rate": 6.642033158219436e-06, |
|
"loss": 1.2782, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.41298467087466184, |
|
"grad_norm": 1.2705610688755955, |
|
"learning_rate": 6.628224543951558e-06, |
|
"loss": 1.2573, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.4138863841298467, |
|
"grad_norm": 1.3037540862478307, |
|
"learning_rate": 6.614402023857231e-06, |
|
"loss": 1.2523, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.4147880973850316, |
|
"grad_norm": 1.315768394711074, |
|
"learning_rate": 6.600565715987477e-06, |
|
"loss": 1.3002, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.41568981064021643, |
|
"grad_norm": 1.2815374396487438, |
|
"learning_rate": 6.586715738511067e-06, |
|
"loss": 1.2452, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.4165915238954013, |
|
"grad_norm": 1.265492572389699, |
|
"learning_rate": 6.5728522097135185e-06, |
|
"loss": 1.2615, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.4174932371505861, |
|
"grad_norm": 1.3240543289156776, |
|
"learning_rate": 6.558975247996082e-06, |
|
"loss": 1.2809, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.41839495040577096, |
|
"grad_norm": 1.3155938565360743, |
|
"learning_rate": 6.545084971874738e-06, |
|
"loss": 1.2814, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.4192966636609558, |
|
"grad_norm": 1.373703900141433, |
|
"learning_rate": 6.531181499979171e-06, |
|
"loss": 1.2914, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.42019837691614065, |
|
"grad_norm": 1.240236493584311, |
|
"learning_rate": 6.517264951051768e-06, |
|
"loss": 1.2626, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.4211000901713255, |
|
"grad_norm": 1.2854276989826168, |
|
"learning_rate": 6.503335443946599e-06, |
|
"loss": 1.2403, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.4220018034265104, |
|
"grad_norm": 1.2747103544525322, |
|
"learning_rate": 6.489393097628404e-06, |
|
"loss": 1.2539, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.42290351668169524, |
|
"grad_norm": 1.2909245211989353, |
|
"learning_rate": 6.475438031171574e-06, |
|
"loss": 1.2429, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.4238052299368801, |
|
"grad_norm": 1.337002870116083, |
|
"learning_rate": 6.461470363759138e-06, |
|
"loss": 1.2849, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4247069431920649, |
|
"grad_norm": 1.2988092746817106, |
|
"learning_rate": 6.447490214681742e-06, |
|
"loss": 1.2777, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.42560865644724977, |
|
"grad_norm": 1.317724826921231, |
|
"learning_rate": 6.433497703336634e-06, |
|
"loss": 1.2512, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.4265103697024346, |
|
"grad_norm": 1.2707143136330774, |
|
"learning_rate": 6.419492949226639e-06, |
|
"loss": 1.2728, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.42741208295761945, |
|
"grad_norm": 1.3083801478910981, |
|
"learning_rate": 6.405476071959142e-06, |
|
"loss": 1.292, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.42831379621280435, |
|
"grad_norm": 1.3054874743338112, |
|
"learning_rate": 6.391447191245066e-06, |
|
"loss": 1.2517, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.4292155094679892, |
|
"grad_norm": 1.2904740870179476, |
|
"learning_rate": 6.3774064268978485e-06, |
|
"loss": 1.2707, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.43011722272317404, |
|
"grad_norm": 1.2629518785414842, |
|
"learning_rate": 6.363353898832421e-06, |
|
"loss": 1.2582, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.4310189359783589, |
|
"grad_norm": 1.3089815906738431, |
|
"learning_rate": 6.34928972706418e-06, |
|
"loss": 1.2735, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.43192064923354373, |
|
"grad_norm": 1.290175664928981, |
|
"learning_rate": 6.335214031707966e-06, |
|
"loss": 1.2844, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.4328223624887286, |
|
"grad_norm": 1.2793599157516249, |
|
"learning_rate": 6.321126932977035e-06, |
|
"loss": 1.2853, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4337240757439134, |
|
"grad_norm": 1.3000724086825444, |
|
"learning_rate": 6.307028551182041e-06, |
|
"loss": 1.2285, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.43462578899909826, |
|
"grad_norm": 1.3433631585110632, |
|
"learning_rate": 6.292919006729988e-06, |
|
"loss": 1.2548, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.43552750225428316, |
|
"grad_norm": 1.2938230816915852, |
|
"learning_rate": 6.278798420123227e-06, |
|
"loss": 1.2848, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.436429215509468, |
|
"grad_norm": 1.3968096528040583, |
|
"learning_rate": 6.264666911958404e-06, |
|
"loss": 1.277, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.43733092876465285, |
|
"grad_norm": 1.3270469545827397, |
|
"learning_rate": 6.250524602925449e-06, |
|
"loss": 1.2472, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4382326420198377, |
|
"grad_norm": 1.3158907122253496, |
|
"learning_rate": 6.23637161380653e-06, |
|
"loss": 1.2371, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.43913435527502254, |
|
"grad_norm": 1.2974298920685672, |
|
"learning_rate": 6.222208065475034e-06, |
|
"loss": 1.2634, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.4400360685302074, |
|
"grad_norm": 1.2961515785195792, |
|
"learning_rate": 6.208034078894523e-06, |
|
"loss": 1.2948, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.4409377817853922, |
|
"grad_norm": 1.3046294501341769, |
|
"learning_rate": 6.193849775117709e-06, |
|
"loss": 1.2559, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.4418394950405771, |
|
"grad_norm": 1.3561539228341617, |
|
"learning_rate": 6.179655275285422e-06, |
|
"loss": 1.2522, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.44274120829576197, |
|
"grad_norm": 1.3657194147132745, |
|
"learning_rate": 6.165450700625565e-06, |
|
"loss": 1.2813, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.4436429215509468, |
|
"grad_norm": 1.3150013080989733, |
|
"learning_rate": 6.151236172452086e-06, |
|
"loss": 1.2724, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.44454463480613166, |
|
"grad_norm": 1.3131202427968371, |
|
"learning_rate": 6.137011812163943e-06, |
|
"loss": 1.2533, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.4454463480613165, |
|
"grad_norm": 1.3524564852985235, |
|
"learning_rate": 6.122777741244067e-06, |
|
"loss": 1.2631, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.44634806131650134, |
|
"grad_norm": 1.3458593194377417, |
|
"learning_rate": 6.108534081258317e-06, |
|
"loss": 1.2685, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4472497745716862, |
|
"grad_norm": 1.330923016565149, |
|
"learning_rate": 6.094280953854451e-06, |
|
"loss": 1.2568, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.44815148782687103, |
|
"grad_norm": 1.328196318920164, |
|
"learning_rate": 6.0800184807610815e-06, |
|
"loss": 1.2646, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.44905320108205593, |
|
"grad_norm": 1.3543757841751654, |
|
"learning_rate": 6.065746783786639e-06, |
|
"loss": 1.2466, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.4499549143372408, |
|
"grad_norm": 1.3642324780253887, |
|
"learning_rate": 6.051465984818332e-06, |
|
"loss": 1.2723, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.4508566275924256, |
|
"grad_norm": 1.2543782903684808, |
|
"learning_rate": 6.037176205821099e-06, |
|
"loss": 1.265, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.45175834084761046, |
|
"grad_norm": 1.3520797825716413, |
|
"learning_rate": 6.022877568836579e-06, |
|
"loss": 1.271, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.4526600541027953, |
|
"grad_norm": 1.3381165664660035, |
|
"learning_rate": 6.008570195982057e-06, |
|
"loss": 1.2842, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.45356176735798015, |
|
"grad_norm": 1.3145159045552166, |
|
"learning_rate": 5.9942542094494295e-06, |
|
"loss": 1.2608, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.454463480613165, |
|
"grad_norm": 1.3776537193003155, |
|
"learning_rate": 5.979929731504158e-06, |
|
"loss": 1.2462, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.45536519386834984, |
|
"grad_norm": 1.3582248635145542, |
|
"learning_rate": 5.9655968844842236e-06, |
|
"loss": 1.2697, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.45626690712353474, |
|
"grad_norm": 1.397601016532863, |
|
"learning_rate": 5.951255790799082e-06, |
|
"loss": 1.2568, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.4571686203787196, |
|
"grad_norm": 1.3563867417958715, |
|
"learning_rate": 5.936906572928625e-06, |
|
"loss": 1.2427, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.4580703336339044, |
|
"grad_norm": 1.3042721613566737, |
|
"learning_rate": 5.922549353422121e-06, |
|
"loss": 1.2515, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.45897204688908927, |
|
"grad_norm": 1.3588624169364447, |
|
"learning_rate": 5.908184254897183e-06, |
|
"loss": 1.2818, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.4598737601442741, |
|
"grad_norm": 1.3477204486305108, |
|
"learning_rate": 5.893811400038711e-06, |
|
"loss": 1.2512, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.46077547339945896, |
|
"grad_norm": 1.2814432877128779, |
|
"learning_rate": 5.87943091159785e-06, |
|
"loss": 1.2307, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.4616771866546438, |
|
"grad_norm": 1.3786543590269573, |
|
"learning_rate": 5.865042912390938e-06, |
|
"loss": 1.2736, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.4625788999098287, |
|
"grad_norm": 1.2913891449053854, |
|
"learning_rate": 5.850647525298457e-06, |
|
"loss": 1.2452, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.46348061316501354, |
|
"grad_norm": 1.415181008314584, |
|
"learning_rate": 5.836244873263989e-06, |
|
"loss": 1.2264, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.4643823264201984, |
|
"grad_norm": 1.356445707006065, |
|
"learning_rate": 5.8218350792931596e-06, |
|
"loss": 1.2504, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.46528403967538323, |
|
"grad_norm": 1.3302986281953149, |
|
"learning_rate": 5.807418266452591e-06, |
|
"loss": 1.2422, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4661857529305681, |
|
"grad_norm": 1.33730329817938, |
|
"learning_rate": 5.792994557868851e-06, |
|
"loss": 1.2566, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.4670874661857529, |
|
"grad_norm": 1.3745406587403888, |
|
"learning_rate": 5.778564076727395e-06, |
|
"loss": 1.2577, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.46798917944093776, |
|
"grad_norm": 1.3877226557278701, |
|
"learning_rate": 5.764126946271526e-06, |
|
"loss": 1.2332, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.4688908926961226, |
|
"grad_norm": 1.3523749190458996, |
|
"learning_rate": 5.749683289801331e-06, |
|
"loss": 1.2735, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4697926059513075, |
|
"grad_norm": 1.2986294783132397, |
|
"learning_rate": 5.735233230672636e-06, |
|
"loss": 1.2509, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.47069431920649235, |
|
"grad_norm": 1.3292382069120443, |
|
"learning_rate": 5.720776892295944e-06, |
|
"loss": 1.2429, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.4715960324616772, |
|
"grad_norm": 1.3196463593122516, |
|
"learning_rate": 5.70631439813539e-06, |
|
"loss": 1.2614, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.47249774571686204, |
|
"grad_norm": 1.3365623305366012, |
|
"learning_rate": 5.691845871707682e-06, |
|
"loss": 1.2547, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.4733994589720469, |
|
"grad_norm": 1.4092965451878707, |
|
"learning_rate": 5.677371436581044e-06, |
|
"loss": 1.2522, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4743011722272317, |
|
"grad_norm": 1.2958259510303567, |
|
"learning_rate": 5.662891216374165e-06, |
|
"loss": 1.2589, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.47520288548241657, |
|
"grad_norm": 1.314219195752724, |
|
"learning_rate": 5.64840533475514e-06, |
|
"loss": 1.264, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.47610459873760147, |
|
"grad_norm": 1.3183829593636753, |
|
"learning_rate": 5.633913915440419e-06, |
|
"loss": 1.2719, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.4770063119927863, |
|
"grad_norm": 1.363071460186982, |
|
"learning_rate": 5.61941708219374e-06, |
|
"loss": 1.2327, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.47790802524797116, |
|
"grad_norm": 1.328897114850557, |
|
"learning_rate": 5.604914958825085e-06, |
|
"loss": 1.2728, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.478809738503156, |
|
"grad_norm": 1.3490178940429087, |
|
"learning_rate": 5.590407669189612e-06, |
|
"loss": 1.2648, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.47971145175834085, |
|
"grad_norm": 1.3274020505027164, |
|
"learning_rate": 5.575895337186605e-06, |
|
"loss": 1.2312, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.4806131650135257, |
|
"grad_norm": 1.3042298628231705, |
|
"learning_rate": 5.561378086758406e-06, |
|
"loss": 1.2511, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.48151487826871053, |
|
"grad_norm": 1.2449161967710574, |
|
"learning_rate": 5.546856041889374e-06, |
|
"loss": 1.2528, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.4824165915238954, |
|
"grad_norm": 1.297681240745865, |
|
"learning_rate": 5.5323293266047996e-06, |
|
"loss": 1.2618, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.4833183047790803, |
|
"grad_norm": 1.25941931209134, |
|
"learning_rate": 5.5177980649698744e-06, |
|
"loss": 1.2449, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.4842200180342651, |
|
"grad_norm": 1.3103057695935634, |
|
"learning_rate": 5.503262381088613e-06, |
|
"loss": 1.2537, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.48512173128944996, |
|
"grad_norm": 1.2872216919055939, |
|
"learning_rate": 5.488722399102796e-06, |
|
"loss": 1.251, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.4860234445446348, |
|
"grad_norm": 1.335560503143788, |
|
"learning_rate": 5.4741782431909144e-06, |
|
"loss": 1.2464, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.48692515779981965, |
|
"grad_norm": 1.276852157297722, |
|
"learning_rate": 5.459630037567105e-06, |
|
"loss": 1.2418, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4878268710550045, |
|
"grad_norm": 1.3990001265601495, |
|
"learning_rate": 5.445077906480095e-06, |
|
"loss": 1.2597, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.48872858431018934, |
|
"grad_norm": 1.2988789147578377, |
|
"learning_rate": 5.430521974212132e-06, |
|
"loss": 1.271, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.4896302975653742, |
|
"grad_norm": 1.289894149801735, |
|
"learning_rate": 5.4159623650779305e-06, |
|
"loss": 1.2396, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4905320108205591, |
|
"grad_norm": 1.3361917628103448, |
|
"learning_rate": 5.4013992034236065e-06, |
|
"loss": 1.2806, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.4914337240757439, |
|
"grad_norm": 1.3851343658094326, |
|
"learning_rate": 5.386832613625615e-06, |
|
"loss": 1.2652, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.49233543733092877, |
|
"grad_norm": 1.3460734085077293, |
|
"learning_rate": 5.3722627200896894e-06, |
|
"loss": 1.2381, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.4932371505861136, |
|
"grad_norm": 1.3361567213666667, |
|
"learning_rate": 5.357689647249782e-06, |
|
"loss": 1.2388, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.49413886384129846, |
|
"grad_norm": 1.2889281104821497, |
|
"learning_rate": 5.343113519566994e-06, |
|
"loss": 1.2488, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.4950405770964833, |
|
"grad_norm": 1.2997408839425744, |
|
"learning_rate": 5.328534461528515e-06, |
|
"loss": 1.2575, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.49594229035166815, |
|
"grad_norm": 1.3549331356810177, |
|
"learning_rate": 5.3139525976465675e-06, |
|
"loss": 1.2639, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.49684400360685305, |
|
"grad_norm": 1.3051590759911373, |
|
"learning_rate": 5.299368052457332e-06, |
|
"loss": 1.2566, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.4977457168620379, |
|
"grad_norm": 1.3452343742881867, |
|
"learning_rate": 5.284780950519892e-06, |
|
"loss": 1.2587, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.49864743011722273, |
|
"grad_norm": 1.3127844704746279, |
|
"learning_rate": 5.270191416415163e-06, |
|
"loss": 1.2499, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.4995491433724076, |
|
"grad_norm": 1.3200976279887406, |
|
"learning_rate": 5.255599574744836e-06, |
|
"loss": 1.2732, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.5004508566275925, |
|
"grad_norm": 1.337854355373663, |
|
"learning_rate": 5.241005550130308e-06, |
|
"loss": 1.2649, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5013525698827773, |
|
"grad_norm": 1.314236194849463, |
|
"learning_rate": 5.2264094672116195e-06, |
|
"loss": 1.2482, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.5022542831379622, |
|
"grad_norm": 1.2662421270865347, |
|
"learning_rate": 5.211811450646392e-06, |
|
"loss": 1.2555, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.503155996393147, |
|
"grad_norm": 1.3444708300857615, |
|
"learning_rate": 5.197211625108755e-06, |
|
"loss": 1.2855, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.5040577096483319, |
|
"grad_norm": 1.3117429792391575, |
|
"learning_rate": 5.182610115288296e-06, |
|
"loss": 1.2323, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.5049594229035167, |
|
"grad_norm": 1.3763818285742713, |
|
"learning_rate": 5.16800704588898e-06, |
|
"loss": 1.2401, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5058611361587015, |
|
"grad_norm": 1.3139874556118811, |
|
"learning_rate": 5.153402541628097e-06, |
|
"loss": 1.2701, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.5067628494138864, |
|
"grad_norm": 1.2896440294650282, |
|
"learning_rate": 5.138796727235188e-06, |
|
"loss": 1.242, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.5076645626690712, |
|
"grad_norm": 1.3289435111036993, |
|
"learning_rate": 5.124189727450985e-06, |
|
"loss": 1.2483, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.5085662759242561, |
|
"grad_norm": 1.3883596860696592, |
|
"learning_rate": 5.109581667026341e-06, |
|
"loss": 1.2503, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.5094679891794409, |
|
"grad_norm": 1.3239009545532878, |
|
"learning_rate": 5.094972670721171e-06, |
|
"loss": 1.2401, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5103697024346258, |
|
"grad_norm": 1.295778294127707, |
|
"learning_rate": 5.080362863303379e-06, |
|
"loss": 1.2423, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.5112714156898106, |
|
"grad_norm": 1.369226214795755, |
|
"learning_rate": 5.065752369547803e-06, |
|
"loss": 1.2225, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.5121731289449954, |
|
"grad_norm": 1.4045782383828402, |
|
"learning_rate": 5.051141314235135e-06, |
|
"loss": 1.255, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.5130748422001803, |
|
"grad_norm": 1.3943603790077395, |
|
"learning_rate": 5.036529822150865e-06, |
|
"loss": 1.2561, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.5139765554553652, |
|
"grad_norm": 1.3174744572295207, |
|
"learning_rate": 5.021918018084217e-06, |
|
"loss": 1.2606, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5148782687105501, |
|
"grad_norm": 1.339569973719635, |
|
"learning_rate": 5.007306026827076e-06, |
|
"loss": 1.204, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.5157799819657349, |
|
"grad_norm": 1.3320754147517606, |
|
"learning_rate": 4.992693973172925e-06, |
|
"loss": 1.2509, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.5166816952209198, |
|
"grad_norm": 1.3051524852266552, |
|
"learning_rate": 4.978081981915784e-06, |
|
"loss": 1.2567, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.5175834084761046, |
|
"grad_norm": 1.3339398715435005, |
|
"learning_rate": 4.963470177849135e-06, |
|
"loss": 1.2611, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.5184851217312895, |
|
"grad_norm": 1.3586447809755204, |
|
"learning_rate": 4.948858685764867e-06, |
|
"loss": 1.2572, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5193868349864743, |
|
"grad_norm": 1.3003847078321877, |
|
"learning_rate": 4.934247630452198e-06, |
|
"loss": 1.2395, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.5202885482416592, |
|
"grad_norm": 1.2589068276430717, |
|
"learning_rate": 4.919637136696621e-06, |
|
"loss": 1.2392, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.521190261496844, |
|
"grad_norm": 1.340867722878211, |
|
"learning_rate": 4.905027329278831e-06, |
|
"loss": 1.2476, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.5220919747520288, |
|
"grad_norm": 1.3726498349859046, |
|
"learning_rate": 4.89041833297366e-06, |
|
"loss": 1.2498, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.5229936880072137, |
|
"grad_norm": 1.3354475932049095, |
|
"learning_rate": 4.875810272549017e-06, |
|
"loss": 1.2521, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5238954012623985, |
|
"grad_norm": 1.3329517177669807, |
|
"learning_rate": 4.861203272764813e-06, |
|
"loss": 1.269, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.5247971145175834, |
|
"grad_norm": 1.3565466102588846, |
|
"learning_rate": 4.846597458371905e-06, |
|
"loss": 1.2419, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.5256988277727682, |
|
"grad_norm": 1.4078312898982641, |
|
"learning_rate": 4.831992954111022e-06, |
|
"loss": 1.2509, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.5266005410279531, |
|
"grad_norm": 1.3295601064574625, |
|
"learning_rate": 4.817389884711706e-06, |
|
"loss": 1.2644, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.527502254283138, |
|
"grad_norm": 1.321165085338158, |
|
"learning_rate": 4.802788374891246e-06, |
|
"loss": 1.2556, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5284039675383229, |
|
"grad_norm": 1.3743099444135773, |
|
"learning_rate": 4.788188549353611e-06, |
|
"loss": 1.2417, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.5293056807935077, |
|
"grad_norm": 1.332517658766984, |
|
"learning_rate": 4.773590532788382e-06, |
|
"loss": 1.2539, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.5302073940486925, |
|
"grad_norm": 1.2694313233555439, |
|
"learning_rate": 4.758994449869693e-06, |
|
"loss": 1.2736, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.5311091073038774, |
|
"grad_norm": 1.2742141092043229, |
|
"learning_rate": 4.744400425255165e-06, |
|
"loss": 1.2686, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.5320108205590622, |
|
"grad_norm": 1.3252301704980207, |
|
"learning_rate": 4.7298085835848385e-06, |
|
"loss": 1.2448, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5329125338142471, |
|
"grad_norm": 1.3205652503863317, |
|
"learning_rate": 4.71521904948011e-06, |
|
"loss": 1.2445, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.5338142470694319, |
|
"grad_norm": 1.3075901906712277, |
|
"learning_rate": 4.700631947542667e-06, |
|
"loss": 1.2344, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.5347159603246168, |
|
"grad_norm": 1.2737322650247187, |
|
"learning_rate": 4.686047402353433e-06, |
|
"loss": 1.2524, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.5356176735798016, |
|
"grad_norm": 1.2756661202797257, |
|
"learning_rate": 4.671465538471487e-06, |
|
"loss": 1.2503, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.5365193868349865, |
|
"grad_norm": 1.325658160187221, |
|
"learning_rate": 4.6568864804330095e-06, |
|
"loss": 1.2465, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5374211000901713, |
|
"grad_norm": 1.28854606631265, |
|
"learning_rate": 4.64231035275022e-06, |
|
"loss": 1.2605, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.5383228133453561, |
|
"grad_norm": 1.35727097357451, |
|
"learning_rate": 4.627737279910311e-06, |
|
"loss": 1.2563, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.539224526600541, |
|
"grad_norm": 1.3307866660108574, |
|
"learning_rate": 4.613167386374386e-06, |
|
"loss": 1.2746, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.5401262398557258, |
|
"grad_norm": 1.2981602983236322, |
|
"learning_rate": 4.598600796576395e-06, |
|
"loss": 1.2606, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.5410279531109107, |
|
"grad_norm": 1.2860239646762985, |
|
"learning_rate": 4.58403763492207e-06, |
|
"loss": 1.2577, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5419296663660956, |
|
"grad_norm": 1.3065289252471795, |
|
"learning_rate": 4.569478025787869e-06, |
|
"loss": 1.2276, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.5428313796212805, |
|
"grad_norm": 1.2932249228962214, |
|
"learning_rate": 4.554922093519906e-06, |
|
"loss": 1.2472, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.5437330928764653, |
|
"grad_norm": 1.2565236651934977, |
|
"learning_rate": 4.5403699624328955e-06, |
|
"loss": 1.2303, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.5446348061316502, |
|
"grad_norm": 1.248067022227819, |
|
"learning_rate": 4.525821756809088e-06, |
|
"loss": 1.2453, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.545536519386835, |
|
"grad_norm": 1.3002341150666157, |
|
"learning_rate": 4.511277600897205e-06, |
|
"loss": 1.2157, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5464382326420198, |
|
"grad_norm": 1.4187715891618866, |
|
"learning_rate": 4.496737618911388e-06, |
|
"loss": 1.2559, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.5473399458972047, |
|
"grad_norm": 1.3027783100524892, |
|
"learning_rate": 4.482201935030126e-06, |
|
"loss": 1.2335, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.5482416591523895, |
|
"grad_norm": 1.305987240699055, |
|
"learning_rate": 4.467670673395202e-06, |
|
"loss": 1.2561, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.5491433724075744, |
|
"grad_norm": 1.28216707654573, |
|
"learning_rate": 4.4531439581106295e-06, |
|
"loss": 1.2195, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.5500450856627592, |
|
"grad_norm": 1.300031525314949, |
|
"learning_rate": 4.438621913241593e-06, |
|
"loss": 1.2583, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5509467989179441, |
|
"grad_norm": 1.3321215317104576, |
|
"learning_rate": 4.424104662813396e-06, |
|
"loss": 1.2331, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.5518485121731289, |
|
"grad_norm": 1.2888029553989442, |
|
"learning_rate": 4.409592330810389e-06, |
|
"loss": 1.2238, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.5527502254283138, |
|
"grad_norm": 1.2815847068063542, |
|
"learning_rate": 4.3950850411749164e-06, |
|
"loss": 1.2204, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.5536519386834986, |
|
"grad_norm": 1.325255076780753, |
|
"learning_rate": 4.38058291780626e-06, |
|
"loss": 1.2165, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.5545536519386834, |
|
"grad_norm": 1.2735817359390165, |
|
"learning_rate": 4.366086084559582e-06, |
|
"loss": 1.2599, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5554553651938684, |
|
"grad_norm": 1.2850880285296393, |
|
"learning_rate": 4.351594665244861e-06, |
|
"loss": 1.2474, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.5563570784490532, |
|
"grad_norm": 1.321852801332939, |
|
"learning_rate": 4.337108783625837e-06, |
|
"loss": 1.2335, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.5572587917042381, |
|
"grad_norm": 1.29957738737894, |
|
"learning_rate": 4.322628563418958e-06, |
|
"loss": 1.2347, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.5581605049594229, |
|
"grad_norm": 1.2437967051806695, |
|
"learning_rate": 4.308154128292318e-06, |
|
"loss": 1.2319, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.5590622182146078, |
|
"grad_norm": 1.323952254471239, |
|
"learning_rate": 4.29368560186461e-06, |
|
"loss": 1.2393, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5599639314697926, |
|
"grad_norm": 1.3364243863997782, |
|
"learning_rate": 4.279223107704058e-06, |
|
"loss": 1.2353, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.5608656447249775, |
|
"grad_norm": 1.223403461559393, |
|
"learning_rate": 4.264766769327367e-06, |
|
"loss": 1.2218, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.5617673579801623, |
|
"grad_norm": 1.3309304196344736, |
|
"learning_rate": 4.2503167101986695e-06, |
|
"loss": 1.2183, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.5626690712353472, |
|
"grad_norm": 1.3334404043758776, |
|
"learning_rate": 4.235873053728475e-06, |
|
"loss": 1.2517, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.563570784490532, |
|
"grad_norm": 1.2850759662357756, |
|
"learning_rate": 4.221435923272606e-06, |
|
"loss": 1.2495, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5644724977457168, |
|
"grad_norm": 1.3029858639642806, |
|
"learning_rate": 4.207005442131151e-06, |
|
"loss": 1.2593, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.5653742110009017, |
|
"grad_norm": 1.2667764626567348, |
|
"learning_rate": 4.1925817335474095e-06, |
|
"loss": 1.2248, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5662759242560865, |
|
"grad_norm": 1.2666696474712575, |
|
"learning_rate": 4.17816492070684e-06, |
|
"loss": 1.2328, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.5671776375112714, |
|
"grad_norm": 1.3025168642655727, |
|
"learning_rate": 4.163755126736011e-06, |
|
"loss": 1.243, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.5680793507664562, |
|
"grad_norm": 1.2526066400333988, |
|
"learning_rate": 4.149352474701545e-06, |
|
"loss": 1.2673, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5689810640216412, |
|
"grad_norm": 1.3106005532588625, |
|
"learning_rate": 4.134957087609065e-06, |
|
"loss": 1.2461, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.569882777276826, |
|
"grad_norm": 1.345734152807863, |
|
"learning_rate": 4.1205690884021506e-06, |
|
"loss": 1.2622, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.5707844905320109, |
|
"grad_norm": 1.2847763091271833, |
|
"learning_rate": 4.10618859996129e-06, |
|
"loss": 1.2491, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5716862037871957, |
|
"grad_norm": 1.3398059744530983, |
|
"learning_rate": 4.091815745102818e-06, |
|
"loss": 1.2341, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.5725879170423805, |
|
"grad_norm": 1.279245025275653, |
|
"learning_rate": 4.077450646577881e-06, |
|
"loss": 1.2276, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5734896302975654, |
|
"grad_norm": 1.31103306359405, |
|
"learning_rate": 4.063093427071376e-06, |
|
"loss": 1.2622, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.5743913435527502, |
|
"grad_norm": 1.2869047741613928, |
|
"learning_rate": 4.048744209200918e-06, |
|
"loss": 1.2526, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.5752930568079351, |
|
"grad_norm": 1.262888543516136, |
|
"learning_rate": 4.034403115515778e-06, |
|
"loss": 1.2447, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.5761947700631199, |
|
"grad_norm": 1.318138827623911, |
|
"learning_rate": 4.020070268495844e-06, |
|
"loss": 1.2477, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.5770964833183048, |
|
"grad_norm": 1.3000660382986018, |
|
"learning_rate": 4.005745790550572e-06, |
|
"loss": 1.2348, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5779981965734896, |
|
"grad_norm": 1.3109955465621879, |
|
"learning_rate": 3.991429804017944e-06, |
|
"loss": 1.2437, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.5788999098286745, |
|
"grad_norm": 1.3230840693922976, |
|
"learning_rate": 3.9771224311634225e-06, |
|
"loss": 1.2466, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.5798016230838593, |
|
"grad_norm": 1.2911053004123727, |
|
"learning_rate": 3.962823794178902e-06, |
|
"loss": 1.2205, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.5807033363390441, |
|
"grad_norm": 1.29394302161919, |
|
"learning_rate": 3.948534015181671e-06, |
|
"loss": 1.2436, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.581605049594229, |
|
"grad_norm": 1.338827552196303, |
|
"learning_rate": 3.93425321621336e-06, |
|
"loss": 1.2487, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5825067628494139, |
|
"grad_norm": 1.2744443449332064, |
|
"learning_rate": 3.919981519238919e-06, |
|
"loss": 1.2182, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.5834084761045988, |
|
"grad_norm": 1.3142705170968756, |
|
"learning_rate": 3.905719046145551e-06, |
|
"loss": 1.2259, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.5843101893597836, |
|
"grad_norm": 1.3274687859286416, |
|
"learning_rate": 3.891465918741685e-06, |
|
"loss": 1.2403, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.5852119026149685, |
|
"grad_norm": 1.327857493053994, |
|
"learning_rate": 3.8772222587559345e-06, |
|
"loss": 1.2574, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.5861136158701533, |
|
"grad_norm": 1.3271105757469566, |
|
"learning_rate": 3.862988187836057e-06, |
|
"loss": 1.2588, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5870153291253382, |
|
"grad_norm": 1.3154262472743066, |
|
"learning_rate": 3.848763827547915e-06, |
|
"loss": 1.2378, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.587917042380523, |
|
"grad_norm": 1.3272260190880967, |
|
"learning_rate": 3.834549299374437e-06, |
|
"loss": 1.2258, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.5888187556357078, |
|
"grad_norm": 1.2955291861582168, |
|
"learning_rate": 3.8203447247145796e-06, |
|
"loss": 1.249, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.5897204688908927, |
|
"grad_norm": 1.2960283704822624, |
|
"learning_rate": 3.80615022488229e-06, |
|
"loss": 1.2142, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.5906221821460775, |
|
"grad_norm": 1.2663491385871244, |
|
"learning_rate": 3.7919659211054783e-06, |
|
"loss": 1.2421, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5915238954012624, |
|
"grad_norm": 1.256608347450423, |
|
"learning_rate": 3.7777919345249675e-06, |
|
"loss": 1.2287, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.5924256086564472, |
|
"grad_norm": 1.3082636395597274, |
|
"learning_rate": 3.763628386193471e-06, |
|
"loss": 1.2392, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5933273219116321, |
|
"grad_norm": 1.2981758681506774, |
|
"learning_rate": 3.7494753970745536e-06, |
|
"loss": 1.2352, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.5942290351668169, |
|
"grad_norm": 1.3346817481845517, |
|
"learning_rate": 3.7353330880415963e-06, |
|
"loss": 1.215, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.5951307484220018, |
|
"grad_norm": 1.3310172482033298, |
|
"learning_rate": 3.721201579876775e-06, |
|
"loss": 1.2443, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5960324616771867, |
|
"grad_norm": 1.308881273250555, |
|
"learning_rate": 3.7070809932700134e-06, |
|
"loss": 1.2274, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.5969341749323716, |
|
"grad_norm": 1.2636903677769276, |
|
"learning_rate": 3.6929714488179617e-06, |
|
"loss": 1.243, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.5978358881875564, |
|
"grad_norm": 1.3444767242506068, |
|
"learning_rate": 3.6788730670229646e-06, |
|
"loss": 1.2254, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5987376014427412, |
|
"grad_norm": 1.3019319337418664, |
|
"learning_rate": 3.664785968292036e-06, |
|
"loss": 1.2551, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.5996393146979261, |
|
"grad_norm": 1.3364310515081759, |
|
"learning_rate": 3.6507102729358224e-06, |
|
"loss": 1.2375, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.6005410279531109, |
|
"grad_norm": 1.303553692706673, |
|
"learning_rate": 3.6366461011675807e-06, |
|
"loss": 1.2352, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.6014427412082958, |
|
"grad_norm": 1.3241505676835355, |
|
"learning_rate": 3.622593573102153e-06, |
|
"loss": 1.2358, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.6023444544634806, |
|
"grad_norm": 1.3015882655381004, |
|
"learning_rate": 3.608552808754935e-06, |
|
"loss": 1.2414, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.6032461677186655, |
|
"grad_norm": 1.3187048120287344, |
|
"learning_rate": 3.5945239280408596e-06, |
|
"loss": 1.2241, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.6041478809738503, |
|
"grad_norm": 1.2909937470324393, |
|
"learning_rate": 3.580507050773363e-06, |
|
"loss": 1.2344, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6050495942290351, |
|
"grad_norm": 1.312803753965677, |
|
"learning_rate": 3.5665022966633678e-06, |
|
"loss": 1.2082, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.60595130748422, |
|
"grad_norm": 1.3273407315438335, |
|
"learning_rate": 3.552509785318258e-06, |
|
"loss": 1.2578, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.6068530207394048, |
|
"grad_norm": 1.2933303173914894, |
|
"learning_rate": 3.538529636240863e-06, |
|
"loss": 1.23, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.6077547339945897, |
|
"grad_norm": 1.2561311990765511, |
|
"learning_rate": 3.5245619688284277e-06, |
|
"loss": 1.22, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.6086564472497745, |
|
"grad_norm": 1.2956136355818522, |
|
"learning_rate": 3.510606902371598e-06, |
|
"loss": 1.2268, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6095581605049594, |
|
"grad_norm": 1.2993861533198938, |
|
"learning_rate": 3.496664556053401e-06, |
|
"loss": 1.2594, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.6104598737601443, |
|
"grad_norm": 1.256934798888675, |
|
"learning_rate": 3.4827350489482324e-06, |
|
"loss": 1.2333, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.6113615870153292, |
|
"grad_norm": 1.2543400892427217, |
|
"learning_rate": 3.4688185000208297e-06, |
|
"loss": 1.228, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.612263300270514, |
|
"grad_norm": 1.3103327891219767, |
|
"learning_rate": 3.4549150281252635e-06, |
|
"loss": 1.2426, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.6131650135256989, |
|
"grad_norm": 1.3157993375630526, |
|
"learning_rate": 3.441024752003919e-06, |
|
"loss": 1.2386, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6140667267808837, |
|
"grad_norm": 1.3538482564231207, |
|
"learning_rate": 3.4271477902864836e-06, |
|
"loss": 1.2216, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.6149684400360685, |
|
"grad_norm": 1.269784948028544, |
|
"learning_rate": 3.413284261488935e-06, |
|
"loss": 1.2162, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.6158701532912534, |
|
"grad_norm": 1.2704213319719941, |
|
"learning_rate": 3.399434284012525e-06, |
|
"loss": 1.2372, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.6167718665464382, |
|
"grad_norm": 1.383306427504306, |
|
"learning_rate": 3.3855979761427705e-06, |
|
"loss": 1.2345, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.6176735798016231, |
|
"grad_norm": 1.268997641633319, |
|
"learning_rate": 3.3717754560484426e-06, |
|
"loss": 1.2465, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6185752930568079, |
|
"grad_norm": 1.27764409089746, |
|
"learning_rate": 3.3579668417805643e-06, |
|
"loss": 1.2301, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.6194770063119928, |
|
"grad_norm": 1.2627803061282448, |
|
"learning_rate": 3.3441722512713893e-06, |
|
"loss": 1.2109, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.6203787195671776, |
|
"grad_norm": 1.2713892796187032, |
|
"learning_rate": 3.3303918023334024e-06, |
|
"loss": 1.2354, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.6212804328223624, |
|
"grad_norm": 1.3364846439846891, |
|
"learning_rate": 3.316625612658315e-06, |
|
"loss": 1.2017, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.6221821460775473, |
|
"grad_norm": 1.2827748136981727, |
|
"learning_rate": 3.302873799816054e-06, |
|
"loss": 1.2033, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6230838593327321, |
|
"grad_norm": 1.307953964844232, |
|
"learning_rate": 3.2891364812537686e-06, |
|
"loss": 1.2401, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.6239855725879171, |
|
"grad_norm": 1.3259218603144716, |
|
"learning_rate": 3.2754137742948113e-06, |
|
"loss": 1.2352, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.6248872858431019, |
|
"grad_norm": 1.229306275079686, |
|
"learning_rate": 3.2617057961377486e-06, |
|
"loss": 1.2558, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.6257889990982868, |
|
"grad_norm": 1.3009567818281342, |
|
"learning_rate": 3.2480126638553533e-06, |
|
"loss": 1.2514, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.6266907123534716, |
|
"grad_norm": 1.27577455658036, |
|
"learning_rate": 3.234334494393613e-06, |
|
"loss": 1.2358, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6275924256086565, |
|
"grad_norm": 1.221813297508922, |
|
"learning_rate": 3.220671404570719e-06, |
|
"loss": 1.238, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.6284941388638413, |
|
"grad_norm": 1.2637233453633625, |
|
"learning_rate": 3.207023511076079e-06, |
|
"loss": 1.2434, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.6293958521190262, |
|
"grad_norm": 1.291554646733566, |
|
"learning_rate": 3.1933909304693144e-06, |
|
"loss": 1.2154, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.630297565374211, |
|
"grad_norm": 1.3178671426373603, |
|
"learning_rate": 3.1797737791792672e-06, |
|
"loss": 1.2352, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.6311992786293958, |
|
"grad_norm": 1.2872125571122581, |
|
"learning_rate": 3.1661721735030105e-06, |
|
"loss": 1.2354, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6321009918845807, |
|
"grad_norm": 1.313884831911044, |
|
"learning_rate": 3.1525862296048446e-06, |
|
"loss": 1.2376, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.6330027051397655, |
|
"grad_norm": 1.274735255975163, |
|
"learning_rate": 3.1390160635153123e-06, |
|
"loss": 1.2294, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.6339044183949504, |
|
"grad_norm": 1.2614372405167664, |
|
"learning_rate": 3.125461791130204e-06, |
|
"loss": 1.2428, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.6348061316501352, |
|
"grad_norm": 1.272361361972863, |
|
"learning_rate": 3.111923528209577e-06, |
|
"loss": 1.2573, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.6357078449053201, |
|
"grad_norm": 1.3029565371645733, |
|
"learning_rate": 3.098401390376755e-06, |
|
"loss": 1.2271, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6366095581605049, |
|
"grad_norm": 1.2904996939383162, |
|
"learning_rate": 3.0848954931173437e-06, |
|
"loss": 1.2249, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.6375112714156899, |
|
"grad_norm": 1.2898690058485842, |
|
"learning_rate": 3.07140595177825e-06, |
|
"loss": 1.2266, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.6384129846708747, |
|
"grad_norm": 1.3509643247178318, |
|
"learning_rate": 3.0579328815666936e-06, |
|
"loss": 1.2469, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.6393146979260595, |
|
"grad_norm": 1.2971311382634418, |
|
"learning_rate": 3.044476397549221e-06, |
|
"loss": 1.2222, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.6402164111812444, |
|
"grad_norm": 1.3074374357170047, |
|
"learning_rate": 3.031036614650724e-06, |
|
"loss": 1.2324, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6411181244364292, |
|
"grad_norm": 1.320854679826083, |
|
"learning_rate": 3.017613647653461e-06, |
|
"loss": 1.2454, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.6420198376916141, |
|
"grad_norm": 1.2893653611762816, |
|
"learning_rate": 3.0042076111960718e-06, |
|
"loss": 1.2575, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.6429215509467989, |
|
"grad_norm": 1.2828409194239083, |
|
"learning_rate": 2.9908186197726043e-06, |
|
"loss": 1.2254, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.6438232642019838, |
|
"grad_norm": 1.258821924861263, |
|
"learning_rate": 2.977446787731532e-06, |
|
"loss": 1.2415, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.6447249774571686, |
|
"grad_norm": 1.3574129132624322, |
|
"learning_rate": 2.9640922292747785e-06, |
|
"loss": 1.2179, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.6456266907123535, |
|
"grad_norm": 1.3018397262453858, |
|
"learning_rate": 2.9507550584567413e-06, |
|
"loss": 1.2359, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.6465284039675383, |
|
"grad_norm": 1.3114784435553961, |
|
"learning_rate": 2.937435389183324e-06, |
|
"loss": 1.228, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.6474301172227231, |
|
"grad_norm": 1.2959756988384548, |
|
"learning_rate": 2.9241333352109535e-06, |
|
"loss": 1.2086, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.648331830477908, |
|
"grad_norm": 1.2799077133229382, |
|
"learning_rate": 2.910849010145617e-06, |
|
"loss": 1.2168, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.6492335437330928, |
|
"grad_norm": 1.2829633913200977, |
|
"learning_rate": 2.897582527441883e-06, |
|
"loss": 1.2191, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6501352569882777, |
|
"grad_norm": 1.282434130038559, |
|
"learning_rate": 2.8843340004019427e-06, |
|
"loss": 1.2351, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.6510369702434626, |
|
"grad_norm": 1.3105069360123125, |
|
"learning_rate": 2.871103542174637e-06, |
|
"loss": 1.2283, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.6519386834986475, |
|
"grad_norm": 1.3345408753156254, |
|
"learning_rate": 2.857891265754489e-06, |
|
"loss": 1.234, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.6528403967538323, |
|
"grad_norm": 1.3442654702676227, |
|
"learning_rate": 2.8446972839807384e-06, |
|
"loss": 1.2216, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.6537421100090172, |
|
"grad_norm": 1.3564801233118708, |
|
"learning_rate": 2.831521709536382e-06, |
|
"loss": 1.2315, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.654643823264202, |
|
"grad_norm": 1.2746111775530709, |
|
"learning_rate": 2.818364654947211e-06, |
|
"loss": 1.2405, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.6555455365193869, |
|
"grad_norm": 1.2888789023115854, |
|
"learning_rate": 2.8052262325808466e-06, |
|
"loss": 1.1947, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.6564472497745717, |
|
"grad_norm": 1.297464906154778, |
|
"learning_rate": 2.7921065546457773e-06, |
|
"loss": 1.222, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.6573489630297565, |
|
"grad_norm": 1.3127768408774596, |
|
"learning_rate": 2.779005733190412e-06, |
|
"loss": 1.2199, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.6582506762849414, |
|
"grad_norm": 1.3191258871353029, |
|
"learning_rate": 2.7659238801021105e-06, |
|
"loss": 1.2365, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6591523895401262, |
|
"grad_norm": 1.3023738457626162, |
|
"learning_rate": 2.7528611071062366e-06, |
|
"loss": 1.2262, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.6600541027953111, |
|
"grad_norm": 1.2807206465971785, |
|
"learning_rate": 2.7398175257652036e-06, |
|
"loss": 1.2256, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.6609558160504959, |
|
"grad_norm": 1.2660732605347753, |
|
"learning_rate": 2.7267932474775115e-06, |
|
"loss": 1.192, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.6618575293056808, |
|
"grad_norm": 1.3114174256047686, |
|
"learning_rate": 2.7137883834768076e-06, |
|
"loss": 1.2397, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.6627592425608656, |
|
"grad_norm": 1.3406088823582483, |
|
"learning_rate": 2.7008030448309318e-06, |
|
"loss": 1.2103, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.6636609558160504, |
|
"grad_norm": 1.2614996055747296, |
|
"learning_rate": 2.6878373424409705e-06, |
|
"loss": 1.2365, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.6645626690712354, |
|
"grad_norm": 1.2732704037998983, |
|
"learning_rate": 2.674891387040298e-06, |
|
"loss": 1.2243, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.6654643823264202, |
|
"grad_norm": 1.3360075379083336, |
|
"learning_rate": 2.66196528919365e-06, |
|
"loss": 1.2478, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.6663660955816051, |
|
"grad_norm": 1.3054505998468804, |
|
"learning_rate": 2.649059159296158e-06, |
|
"loss": 1.254, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.6672678088367899, |
|
"grad_norm": 1.2942891584243765, |
|
"learning_rate": 2.6361731075724327e-06, |
|
"loss": 1.2153, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6681695220919748, |
|
"grad_norm": 1.3109493986388532, |
|
"learning_rate": 2.6233072440755934e-06, |
|
"loss": 1.2328, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.6690712353471596, |
|
"grad_norm": 1.3327195437209476, |
|
"learning_rate": 2.6104616786863507e-06, |
|
"loss": 1.2199, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.6699729486023445, |
|
"grad_norm": 1.2821126629709811, |
|
"learning_rate": 2.597636521112053e-06, |
|
"loss": 1.2045, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.6708746618575293, |
|
"grad_norm": 1.320388296773102, |
|
"learning_rate": 2.584831880885761e-06, |
|
"loss": 1.2243, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.6717763751127142, |
|
"grad_norm": 1.294843994895677, |
|
"learning_rate": 2.572047867365308e-06, |
|
"loss": 1.2069, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.672678088367899, |
|
"grad_norm": 1.3064477589046204, |
|
"learning_rate": 2.5592845897323596e-06, |
|
"loss": 1.2158, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.6735798016230838, |
|
"grad_norm": 1.3208905098729207, |
|
"learning_rate": 2.5465421569914916e-06, |
|
"loss": 1.2459, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.6744815148782687, |
|
"grad_norm": 1.278779451830435, |
|
"learning_rate": 2.5338206779692536e-06, |
|
"loss": 1.2359, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.6753832281334535, |
|
"grad_norm": 1.254543313346981, |
|
"learning_rate": 2.5211202613132413e-06, |
|
"loss": 1.1942, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.6762849413886384, |
|
"grad_norm": 1.3400421358634278, |
|
"learning_rate": 2.508441015491162e-06, |
|
"loss": 1.2401, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6771866546438232, |
|
"grad_norm": 1.3576541062217489, |
|
"learning_rate": 2.4957830487899224e-06, |
|
"loss": 1.2319, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.6780883678990082, |
|
"grad_norm": 1.3010818454018325, |
|
"learning_rate": 2.4831464693146845e-06, |
|
"loss": 1.2321, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.678990081154193, |
|
"grad_norm": 1.270217200357556, |
|
"learning_rate": 2.4705313849879663e-06, |
|
"loss": 1.2109, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.6798917944093779, |
|
"grad_norm": 1.2871254174671725, |
|
"learning_rate": 2.457937903548695e-06, |
|
"loss": 1.2403, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.6807935076645627, |
|
"grad_norm": 1.267863712970668, |
|
"learning_rate": 2.4453661325513065e-06, |
|
"loss": 1.2247, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.6816952209197475, |
|
"grad_norm": 1.3253294626486456, |
|
"learning_rate": 2.4328161793648126e-06, |
|
"loss": 1.2333, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.6825969341749324, |
|
"grad_norm": 1.2949613306417762, |
|
"learning_rate": 2.420288151171895e-06, |
|
"loss": 1.2199, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.6834986474301172, |
|
"grad_norm": 1.2894731802418073, |
|
"learning_rate": 2.407782154967986e-06, |
|
"loss": 1.1996, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.6844003606853021, |
|
"grad_norm": 1.3266207348050199, |
|
"learning_rate": 2.3952982975603494e-06, |
|
"loss": 1.2265, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.6853020739404869, |
|
"grad_norm": 1.3404362571181616, |
|
"learning_rate": 2.382836685567178e-06, |
|
"loss": 1.2187, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6862037871956718, |
|
"grad_norm": 1.2776517904799676, |
|
"learning_rate": 2.3703974254166704e-06, |
|
"loss": 1.227, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.6871055004508566, |
|
"grad_norm": 1.2838876524156215, |
|
"learning_rate": 2.357980623346143e-06, |
|
"loss": 1.2177, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.6880072137060415, |
|
"grad_norm": 1.2784263173497654, |
|
"learning_rate": 2.345586385401094e-06, |
|
"loss": 1.2218, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.6889089269612263, |
|
"grad_norm": 1.279430229152187, |
|
"learning_rate": 2.3332148174343257e-06, |
|
"loss": 1.2392, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.6898106402164111, |
|
"grad_norm": 1.2520563299524021, |
|
"learning_rate": 2.320866025105016e-06, |
|
"loss": 1.2092, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.690712353471596, |
|
"grad_norm": 1.3149467895203844, |
|
"learning_rate": 2.3085401138778414e-06, |
|
"loss": 1.2338, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.6916140667267808, |
|
"grad_norm": 1.2916597985967335, |
|
"learning_rate": 2.2962371890220502e-06, |
|
"loss": 1.2229, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.6925157799819658, |
|
"grad_norm": 1.3252286984828274, |
|
"learning_rate": 2.283957355610584e-06, |
|
"loss": 1.2095, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.6934174932371506, |
|
"grad_norm": 1.3270969648402997, |
|
"learning_rate": 2.2717007185191673e-06, |
|
"loss": 1.2239, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.6943192064923355, |
|
"grad_norm": 1.3262507945691961, |
|
"learning_rate": 2.25946738242542e-06, |
|
"loss": 1.221, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6952209197475203, |
|
"grad_norm": 1.2744476798628572, |
|
"learning_rate": 2.247257451807961e-06, |
|
"loss": 1.2095, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6961226330027052, |
|
"grad_norm": 1.257677104351814, |
|
"learning_rate": 2.235071030945509e-06, |
|
"loss": 1.2343, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.69702434625789, |
|
"grad_norm": 1.273279317005678, |
|
"learning_rate": 2.2229082239160066e-06, |
|
"loss": 1.2096, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.6979260595130748, |
|
"grad_norm": 1.3275403323151511, |
|
"learning_rate": 2.2107691345957133e-06, |
|
"loss": 1.2223, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.6988277727682597, |
|
"grad_norm": 1.2994686903488226, |
|
"learning_rate": 2.198653866658339e-06, |
|
"loss": 1.2383, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6997294860234445, |
|
"grad_norm": 1.2946263968131735, |
|
"learning_rate": 2.1865625235741376e-06, |
|
"loss": 1.2316, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.7006311992786294, |
|
"grad_norm": 1.3260718116411006, |
|
"learning_rate": 2.1744952086090396e-06, |
|
"loss": 1.1987, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.7015329125338142, |
|
"grad_norm": 1.2928907842117559, |
|
"learning_rate": 2.162452024823758e-06, |
|
"loss": 1.2327, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.7024346257889991, |
|
"grad_norm": 1.3022114442202848, |
|
"learning_rate": 2.1504330750729185e-06, |
|
"loss": 1.2048, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.7033363390441839, |
|
"grad_norm": 1.2680584448769776, |
|
"learning_rate": 2.1384384620041756e-06, |
|
"loss": 1.2022, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7042380522993688, |
|
"grad_norm": 1.2980425468161858, |
|
"learning_rate": 2.1264682880573374e-06, |
|
"loss": 1.2112, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.7051397655545536, |
|
"grad_norm": 1.3239977283045519, |
|
"learning_rate": 2.1145226554634845e-06, |
|
"loss": 1.2105, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.7060414788097386, |
|
"grad_norm": 1.2758563831905616, |
|
"learning_rate": 2.1026016662441097e-06, |
|
"loss": 1.2347, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.7069431920649234, |
|
"grad_norm": 1.2413307004498162, |
|
"learning_rate": 2.0907054222102367e-06, |
|
"loss": 1.2359, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.7078449053201082, |
|
"grad_norm": 1.2797262934378604, |
|
"learning_rate": 2.0788340249615506e-06, |
|
"loss": 1.2328, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.7087466185752931, |
|
"grad_norm": 1.2921593019754436, |
|
"learning_rate": 2.066987575885539e-06, |
|
"loss": 1.222, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.7096483318304779, |
|
"grad_norm": 1.3244519054779904, |
|
"learning_rate": 2.0551661761566104e-06, |
|
"loss": 1.2137, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.7105500450856628, |
|
"grad_norm": 1.2995133181578151, |
|
"learning_rate": 2.0433699267352536e-06, |
|
"loss": 1.2238, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.7114517583408476, |
|
"grad_norm": 1.3071033235317082, |
|
"learning_rate": 2.0315989283671474e-06, |
|
"loss": 1.199, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.7123534715960325, |
|
"grad_norm": 1.2721249987745593, |
|
"learning_rate": 2.0198532815823247e-06, |
|
"loss": 1.1905, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7132551848512173, |
|
"grad_norm": 1.322827431541093, |
|
"learning_rate": 2.0081330866942962e-06, |
|
"loss": 1.2146, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.7141568981064021, |
|
"grad_norm": 1.3139334735005088, |
|
"learning_rate": 1.9964384437992055e-06, |
|
"loss": 1.2415, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.715058611361587, |
|
"grad_norm": 1.2680789806345396, |
|
"learning_rate": 1.98476945277497e-06, |
|
"loss": 1.2181, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.7159603246167718, |
|
"grad_norm": 1.2428341282201179, |
|
"learning_rate": 1.9731262132804275e-06, |
|
"loss": 1.2195, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.7168620378719567, |
|
"grad_norm": 1.3004003311062884, |
|
"learning_rate": 1.9615088247544802e-06, |
|
"loss": 1.223, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.7177637511271415, |
|
"grad_norm": 1.313334117650514, |
|
"learning_rate": 1.9499173864152566e-06, |
|
"loss": 1.2185, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.7186654643823264, |
|
"grad_norm": 1.3238878316428104, |
|
"learning_rate": 1.938351997259258e-06, |
|
"loss": 1.2319, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.7195671776375113, |
|
"grad_norm": 1.3043461142181643, |
|
"learning_rate": 1.926812756060508e-06, |
|
"loss": 1.23, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.7204688908926962, |
|
"grad_norm": 1.2858820326661842, |
|
"learning_rate": 1.9152997613697184e-06, |
|
"loss": 1.1903, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.721370604147881, |
|
"grad_norm": 1.3291400806149936, |
|
"learning_rate": 1.9038131115134401e-06, |
|
"loss": 1.2137, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7222723174030659, |
|
"grad_norm": 1.2976270941930153, |
|
"learning_rate": 1.8923529045932292e-06, |
|
"loss": 1.2037, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.7231740306582507, |
|
"grad_norm": 1.280008791018806, |
|
"learning_rate": 1.8809192384848046e-06, |
|
"loss": 1.2346, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.7240757439134355, |
|
"grad_norm": 1.2520323037262666, |
|
"learning_rate": 1.8695122108372166e-06, |
|
"loss": 1.2157, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.7249774571686204, |
|
"grad_norm": 1.308194040096133, |
|
"learning_rate": 1.8581319190720038e-06, |
|
"loss": 1.2231, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.7258791704238052, |
|
"grad_norm": 1.2796364986114368, |
|
"learning_rate": 1.8467784603823736e-06, |
|
"loss": 1.2192, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.7267808836789901, |
|
"grad_norm": 1.2872786078348708, |
|
"learning_rate": 1.8354519317323632e-06, |
|
"loss": 1.2399, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.7276825969341749, |
|
"grad_norm": 1.2784211379965313, |
|
"learning_rate": 1.824152429856017e-06, |
|
"loss": 1.2403, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.7285843101893598, |
|
"grad_norm": 1.310396993012597, |
|
"learning_rate": 1.8128800512565514e-06, |
|
"loss": 1.2277, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.7294860234445446, |
|
"grad_norm": 1.2612581875182598, |
|
"learning_rate": 1.8016348922055448e-06, |
|
"loss": 1.2311, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.7303877366997295, |
|
"grad_norm": 1.3520209311069702, |
|
"learning_rate": 1.7904170487421002e-06, |
|
"loss": 1.2131, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7312894499549143, |
|
"grad_norm": 1.3254416365883752, |
|
"learning_rate": 1.7792266166720368e-06, |
|
"loss": 1.2083, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.7321911632100991, |
|
"grad_norm": 1.308839493950598, |
|
"learning_rate": 1.7680636915670673e-06, |
|
"loss": 1.2397, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.7330928764652841, |
|
"grad_norm": 1.2861647653716632, |
|
"learning_rate": 1.7569283687639782e-06, |
|
"loss": 1.2047, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.7339945897204689, |
|
"grad_norm": 1.3102171488736987, |
|
"learning_rate": 1.7458207433638225e-06, |
|
"loss": 1.238, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.7348963029756538, |
|
"grad_norm": 1.279839582732384, |
|
"learning_rate": 1.7347409102311013e-06, |
|
"loss": 1.2363, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.7357980162308386, |
|
"grad_norm": 1.2513051547872285, |
|
"learning_rate": 1.7236889639929604e-06, |
|
"loss": 1.2206, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.7366997294860235, |
|
"grad_norm": 1.2571979965399165, |
|
"learning_rate": 1.712664999038372e-06, |
|
"loss": 1.2321, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.7376014427412083, |
|
"grad_norm": 1.2789411825150419, |
|
"learning_rate": 1.7016691095173398e-06, |
|
"loss": 1.226, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.7385031559963932, |
|
"grad_norm": 1.3081374507526442, |
|
"learning_rate": 1.6907013893400838e-06, |
|
"loss": 1.2483, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.739404869251578, |
|
"grad_norm": 1.3048788538202847, |
|
"learning_rate": 1.6797619321762531e-06, |
|
"loss": 1.199, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7403065825067628, |
|
"grad_norm": 1.2811961903485563, |
|
"learning_rate": 1.6688508314541086e-06, |
|
"loss": 1.2262, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.7412082957619477, |
|
"grad_norm": 1.3386620335025967, |
|
"learning_rate": 1.6579681803597392e-06, |
|
"loss": 1.2517, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.7421100090171325, |
|
"grad_norm": 1.2968806903374512, |
|
"learning_rate": 1.6471140718362538e-06, |
|
"loss": 1.2066, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.7430117222723174, |
|
"grad_norm": 1.2752349391240716, |
|
"learning_rate": 1.6362885985830001e-06, |
|
"loss": 1.2239, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.7439134355275022, |
|
"grad_norm": 1.3261984374711637, |
|
"learning_rate": 1.6254918530547663e-06, |
|
"loss": 1.1986, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.7448151487826871, |
|
"grad_norm": 1.2550706964991916, |
|
"learning_rate": 1.6147239274609865e-06, |
|
"loss": 1.2283, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.7457168620378719, |
|
"grad_norm": 1.2924267601556008, |
|
"learning_rate": 1.6039849137649633e-06, |
|
"loss": 1.2284, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.7466185752930569, |
|
"grad_norm": 1.2625304550791376, |
|
"learning_rate": 1.593274903683077e-06, |
|
"loss": 1.2056, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.7475202885482417, |
|
"grad_norm": 1.2690837771273074, |
|
"learning_rate": 1.5825939886840036e-06, |
|
"loss": 1.2255, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.7484220018034266, |
|
"grad_norm": 1.2762767081796036, |
|
"learning_rate": 1.571942259987929e-06, |
|
"loss": 1.2353, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7493237150586114, |
|
"grad_norm": 1.2727759423442815, |
|
"learning_rate": 1.5613198085657804e-06, |
|
"loss": 1.2143, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.7502254283137962, |
|
"grad_norm": 1.3036977265961338, |
|
"learning_rate": 1.5507267251384334e-06, |
|
"loss": 1.206, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.7511271415689811, |
|
"grad_norm": 1.2740183699658059, |
|
"learning_rate": 1.5401631001759604e-06, |
|
"loss": 1.2408, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.7520288548241659, |
|
"grad_norm": 1.2799961121807295, |
|
"learning_rate": 1.5296290238968303e-06, |
|
"loss": 1.2259, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.7529305680793508, |
|
"grad_norm": 1.2946371850023939, |
|
"learning_rate": 1.5191245862671627e-06, |
|
"loss": 1.2378, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.7538322813345356, |
|
"grad_norm": 1.2736534447504666, |
|
"learning_rate": 1.5086498769999397e-06, |
|
"loss": 1.2069, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.7547339945897205, |
|
"grad_norm": 1.2931271877296926, |
|
"learning_rate": 1.4982049855542553e-06, |
|
"loss": 1.2431, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.7556357078449053, |
|
"grad_norm": 1.2741459793452181, |
|
"learning_rate": 1.4877900011345442e-06, |
|
"loss": 1.2203, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.7565374211000901, |
|
"grad_norm": 1.2558676126546313, |
|
"learning_rate": 1.4774050126898164e-06, |
|
"loss": 1.2137, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.757439134355275, |
|
"grad_norm": 1.2745001559561364, |
|
"learning_rate": 1.4670501089129075e-06, |
|
"loss": 1.2066, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7583408476104598, |
|
"grad_norm": 1.2911834530742523, |
|
"learning_rate": 1.4567253782397073e-06, |
|
"loss": 1.2179, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.7592425608656447, |
|
"grad_norm": 1.274307360634065, |
|
"learning_rate": 1.4464309088484252e-06, |
|
"loss": 1.2313, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.7601442741208295, |
|
"grad_norm": 1.2923011502987385, |
|
"learning_rate": 1.4361667886588116e-06, |
|
"loss": 1.1962, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.7610459873760145, |
|
"grad_norm": 1.301882038769701, |
|
"learning_rate": 1.425933105331429e-06, |
|
"loss": 1.2223, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.7619477006311993, |
|
"grad_norm": 1.2863546659971987, |
|
"learning_rate": 1.4157299462668872e-06, |
|
"loss": 1.2043, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.7628494138863842, |
|
"grad_norm": 1.3053223183086544, |
|
"learning_rate": 1.4055573986051125e-06, |
|
"loss": 1.2321, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.763751127141569, |
|
"grad_norm": 1.3159435801199877, |
|
"learning_rate": 1.395415549224587e-06, |
|
"loss": 1.211, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.7646528403967539, |
|
"grad_norm": 1.2851907687256028, |
|
"learning_rate": 1.3853044847416208e-06, |
|
"loss": 1.2144, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.7655545536519387, |
|
"grad_norm": 1.2799253757664457, |
|
"learning_rate": 1.3752242915095993e-06, |
|
"loss": 1.2162, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.7664562669071235, |
|
"grad_norm": 1.2875109268543516, |
|
"learning_rate": 1.3651750556182586e-06, |
|
"loss": 1.2125, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7673579801623084, |
|
"grad_norm": 1.3080584590334174, |
|
"learning_rate": 1.3551568628929434e-06, |
|
"loss": 1.225, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.7682596934174932, |
|
"grad_norm": 1.2860096387559667, |
|
"learning_rate": 1.34516979889387e-06, |
|
"loss": 1.2079, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.7691614066726781, |
|
"grad_norm": 1.235344160104314, |
|
"learning_rate": 1.3352139489154064e-06, |
|
"loss": 1.2131, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.7700631199278629, |
|
"grad_norm": 1.327651942534106, |
|
"learning_rate": 1.3252893979853304e-06, |
|
"loss": 1.2, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.7709648331830478, |
|
"grad_norm": 1.2568962321346648, |
|
"learning_rate": 1.315396230864121e-06, |
|
"loss": 1.2499, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.7718665464382326, |
|
"grad_norm": 1.3369013029699717, |
|
"learning_rate": 1.3055345320442142e-06, |
|
"loss": 1.2521, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.7727682596934174, |
|
"grad_norm": 1.3204783254303465, |
|
"learning_rate": 1.295704385749299e-06, |
|
"loss": 1.2109, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.7736699729486023, |
|
"grad_norm": 1.250606473191615, |
|
"learning_rate": 1.2859058759335835e-06, |
|
"loss": 1.2117, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.7745716862037872, |
|
"grad_norm": 1.2629471608700342, |
|
"learning_rate": 1.2761390862810907e-06, |
|
"loss": 1.2066, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.7754733994589721, |
|
"grad_norm": 1.3409577687424445, |
|
"learning_rate": 1.2664041002049366e-06, |
|
"loss": 1.2136, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7763751127141569, |
|
"grad_norm": 1.2868230389731257, |
|
"learning_rate": 1.256701000846619e-06, |
|
"loss": 1.1905, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.7772768259693418, |
|
"grad_norm": 1.2406015334116862, |
|
"learning_rate": 1.2470298710753047e-06, |
|
"loss": 1.2296, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.7781785392245266, |
|
"grad_norm": 1.2811225594832343, |
|
"learning_rate": 1.2373907934871292e-06, |
|
"loss": 1.2087, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.7790802524797115, |
|
"grad_norm": 1.2846826323381735, |
|
"learning_rate": 1.227783850404487e-06, |
|
"loss": 1.2182, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.7799819657348963, |
|
"grad_norm": 1.2634245507700415, |
|
"learning_rate": 1.218209123875323e-06, |
|
"loss": 1.2383, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.7808836789900812, |
|
"grad_norm": 1.3138866338710329, |
|
"learning_rate": 1.2086666956724425e-06, |
|
"loss": 1.2467, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.781785392245266, |
|
"grad_norm": 1.3186911496412215, |
|
"learning_rate": 1.1991566472928028e-06, |
|
"loss": 1.2289, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.7826871055004508, |
|
"grad_norm": 1.2884060019272627, |
|
"learning_rate": 1.1896790599568291e-06, |
|
"loss": 1.2203, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.7835888187556357, |
|
"grad_norm": 1.3059275711703233, |
|
"learning_rate": 1.1802340146077045e-06, |
|
"loss": 1.2169, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.7844905320108205, |
|
"grad_norm": 1.301415764999824, |
|
"learning_rate": 1.1708215919106963e-06, |
|
"loss": 1.2373, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7853922452660054, |
|
"grad_norm": 1.2923142951378839, |
|
"learning_rate": 1.1614418722524506e-06, |
|
"loss": 1.2093, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.7862939585211902, |
|
"grad_norm": 1.3064111928829703, |
|
"learning_rate": 1.1520949357403194e-06, |
|
"loss": 1.2056, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.7871956717763751, |
|
"grad_norm": 1.2810652585045075, |
|
"learning_rate": 1.1427808622016683e-06, |
|
"loss": 1.2287, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.78809738503156, |
|
"grad_norm": 1.2914490392277977, |
|
"learning_rate": 1.1334997311832003e-06, |
|
"loss": 1.2412, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.7889990982867449, |
|
"grad_norm": 1.255451413387033, |
|
"learning_rate": 1.1242516219502663e-06, |
|
"loss": 1.2131, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.7899008115419297, |
|
"grad_norm": 1.2556143337911658, |
|
"learning_rate": 1.1150366134862033e-06, |
|
"loss": 1.2126, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.7908025247971145, |
|
"grad_norm": 1.3313063769408204, |
|
"learning_rate": 1.105854784491648e-06, |
|
"loss": 1.2468, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.7917042380522994, |
|
"grad_norm": 1.298214254858563, |
|
"learning_rate": 1.0967062133838658e-06, |
|
"loss": 1.2137, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.7926059513074842, |
|
"grad_norm": 1.2746933883075344, |
|
"learning_rate": 1.0875909782960887e-06, |
|
"loss": 1.2039, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.7935076645626691, |
|
"grad_norm": 1.3540595796355972, |
|
"learning_rate": 1.0785091570768386e-06, |
|
"loss": 1.2191, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7944093778178539, |
|
"grad_norm": 1.3563137733418598, |
|
"learning_rate": 1.0694608272892698e-06, |
|
"loss": 1.2376, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.7953110910730388, |
|
"grad_norm": 1.2647689876029176, |
|
"learning_rate": 1.0604460662105022e-06, |
|
"loss": 1.1925, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.7962128043282236, |
|
"grad_norm": 1.3125086631687228, |
|
"learning_rate": 1.0514649508309642e-06, |
|
"loss": 1.2144, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.7971145175834085, |
|
"grad_norm": 1.2897071180116173, |
|
"learning_rate": 1.04251755785373e-06, |
|
"loss": 1.2244, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.7980162308385933, |
|
"grad_norm": 1.283139531262602, |
|
"learning_rate": 1.0336039636938716e-06, |
|
"loss": 1.1859, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7989179440937781, |
|
"grad_norm": 1.2765723038043117, |
|
"learning_rate": 1.024724244477801e-06, |
|
"loss": 1.209, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.799819657348963, |
|
"grad_norm": 1.29015886657531, |
|
"learning_rate": 1.0158784760426243e-06, |
|
"loss": 1.2101, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.8007213706041478, |
|
"grad_norm": 1.2589137070190157, |
|
"learning_rate": 1.0070667339354873e-06, |
|
"loss": 1.207, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.8016230838593328, |
|
"grad_norm": 1.277316552734332, |
|
"learning_rate": 9.98289093412938e-07, |
|
"loss": 1.2457, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.8025247971145176, |
|
"grad_norm": 1.3008418492196654, |
|
"learning_rate": 9.895456294402778e-07, |
|
"loss": 1.2113, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8034265103697025, |
|
"grad_norm": 1.2807960216818002, |
|
"learning_rate": 9.808364166909256e-07, |
|
"loss": 1.197, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.8043282236248873, |
|
"grad_norm": 1.2564207138926697, |
|
"learning_rate": 9.721615295457775e-07, |
|
"loss": 1.1898, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.8052299368800722, |
|
"grad_norm": 1.3041160534809693, |
|
"learning_rate": 9.63521042092575e-07, |
|
"loss": 1.2209, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.806131650135257, |
|
"grad_norm": 1.294351117986239, |
|
"learning_rate": 9.549150281252633e-07, |
|
"loss": 1.2086, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.8070333633904418, |
|
"grad_norm": 1.2894951265659893, |
|
"learning_rate": 9.46343561143373e-07, |
|
"loss": 1.1988, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.8079350766456267, |
|
"grad_norm": 1.2872240658238072, |
|
"learning_rate": 9.378067143513858e-07, |
|
"loss": 1.227, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.8088367899008115, |
|
"grad_norm": 1.2773245018646944, |
|
"learning_rate": 9.29304560658107e-07, |
|
"loss": 1.2261, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.8097385031559964, |
|
"grad_norm": 1.240601055718308, |
|
"learning_rate": 9.20837172676049e-07, |
|
"loss": 1.2217, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.8106402164111812, |
|
"grad_norm": 1.321794367808805, |
|
"learning_rate": 9.124046227208083e-07, |
|
"loss": 1.1978, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.8115419296663661, |
|
"grad_norm": 1.3149000543559988, |
|
"learning_rate": 9.040069828104475e-07, |
|
"loss": 1.229, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8124436429215509, |
|
"grad_norm": 1.3125901611372035, |
|
"learning_rate": 8.956443246648771e-07, |
|
"loss": 1.2368, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.8133453561767358, |
|
"grad_norm": 1.2785850919554989, |
|
"learning_rate": 8.873167197052529e-07, |
|
"loss": 1.2306, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.8142470694319206, |
|
"grad_norm": 1.3000250879578916, |
|
"learning_rate": 8.790242390533521e-07, |
|
"loss": 1.225, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.8151487826871056, |
|
"grad_norm": 1.2818311674334737, |
|
"learning_rate": 8.707669535309793e-07, |
|
"loss": 1.2047, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.8160504959422904, |
|
"grad_norm": 1.271805790254973, |
|
"learning_rate": 8.625449336593522e-07, |
|
"loss": 1.2172, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.8169522091974752, |
|
"grad_norm": 1.2761977162628635, |
|
"learning_rate": 8.543582496585063e-07, |
|
"loss": 1.1918, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.8178539224526601, |
|
"grad_norm": 1.3410080816109553, |
|
"learning_rate": 8.462069714466858e-07, |
|
"loss": 1.22, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.8187556357078449, |
|
"grad_norm": 1.2521910265828438, |
|
"learning_rate": 8.380911686397581e-07, |
|
"loss": 1.2199, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.8196573489630298, |
|
"grad_norm": 1.3638833178851848, |
|
"learning_rate": 8.30010910550611e-07, |
|
"loss": 1.2307, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.8205590622182146, |
|
"grad_norm": 1.2852248499047008, |
|
"learning_rate": 8.219662661885619e-07, |
|
"loss": 1.2033, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8214607754733995, |
|
"grad_norm": 1.2652628132587298, |
|
"learning_rate": 8.139573042587729e-07, |
|
"loss": 1.2028, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.8223624887285843, |
|
"grad_norm": 1.2762952391981852, |
|
"learning_rate": 8.059840931616558e-07, |
|
"loss": 1.1733, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.8232642019837692, |
|
"grad_norm": 1.2774076567333978, |
|
"learning_rate": 7.980467009923009e-07, |
|
"loss": 1.2039, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.824165915238954, |
|
"grad_norm": 1.3040355830465697, |
|
"learning_rate": 7.901451955398792e-07, |
|
"loss": 1.2161, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.8250676284941388, |
|
"grad_norm": 1.3095023386402835, |
|
"learning_rate": 7.822796442870784e-07, |
|
"loss": 1.2345, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.8259693417493237, |
|
"grad_norm": 1.28427660003993, |
|
"learning_rate": 7.744501144095135e-07, |
|
"loss": 1.2107, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.8268710550045085, |
|
"grad_norm": 1.2878227831037923, |
|
"learning_rate": 7.666566727751645e-07, |
|
"loss": 1.211, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.8277727682596934, |
|
"grad_norm": 1.3124410623046319, |
|
"learning_rate": 7.588993859437988e-07, |
|
"loss": 1.2459, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.8286744815148782, |
|
"grad_norm": 1.263563579523678, |
|
"learning_rate": 7.511783201664053e-07, |
|
"loss": 1.204, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.8295761947700632, |
|
"grad_norm": 1.2767223067970443, |
|
"learning_rate": 7.434935413846245e-07, |
|
"loss": 1.2043, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.830477908025248, |
|
"grad_norm": 1.27323540409098, |
|
"learning_rate": 7.35845115230191e-07, |
|
"loss": 1.1902, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.8313796212804329, |
|
"grad_norm": 1.2856585107991603, |
|
"learning_rate": 7.282331070243703e-07, |
|
"loss": 1.214, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.8322813345356177, |
|
"grad_norm": 1.2777930103434787, |
|
"learning_rate": 7.206575817773992e-07, |
|
"loss": 1.2162, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.8331830477908025, |
|
"grad_norm": 1.2695250453108164, |
|
"learning_rate": 7.131186041879357e-07, |
|
"loss": 1.206, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.8340847610459874, |
|
"grad_norm": 1.2670670761716276, |
|
"learning_rate": 7.056162386424964e-07, |
|
"loss": 1.199, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.8349864743011722, |
|
"grad_norm": 1.2738982457981094, |
|
"learning_rate": 6.981505492149232e-07, |
|
"loss": 1.1969, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.8358881875563571, |
|
"grad_norm": 1.264231758299848, |
|
"learning_rate": 6.907215996658174e-07, |
|
"loss": 1.2045, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.8367899008115419, |
|
"grad_norm": 1.2954200749690095, |
|
"learning_rate": 6.833294534420093e-07, |
|
"loss": 1.2117, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.8376916140667268, |
|
"grad_norm": 1.401407562366026, |
|
"learning_rate": 6.759741736760062e-07, |
|
"loss": 1.2149, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.8385933273219116, |
|
"grad_norm": 1.2545881088743782, |
|
"learning_rate": 6.686558231854634e-07, |
|
"loss": 1.1956, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8394950405770965, |
|
"grad_norm": 1.2878052921321332, |
|
"learning_rate": 6.613744644726383e-07, |
|
"loss": 1.2128, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.8403967538322813, |
|
"grad_norm": 1.2746377137243443, |
|
"learning_rate": 6.541301597238636e-07, |
|
"loss": 1.2344, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.8412984670874661, |
|
"grad_norm": 1.258707795620394, |
|
"learning_rate": 6.469229708090091e-07, |
|
"loss": 1.2212, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.842200180342651, |
|
"grad_norm": 1.272285625648656, |
|
"learning_rate": 6.397529592809615e-07, |
|
"loss": 1.2071, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.8431018935978359, |
|
"grad_norm": 1.2769280073303368, |
|
"learning_rate": 6.326201863750942e-07, |
|
"loss": 1.2162, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.8440036068530208, |
|
"grad_norm": 1.281458182940483, |
|
"learning_rate": 6.255247130087405e-07, |
|
"loss": 1.2103, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.8449053201082056, |
|
"grad_norm": 1.2921797567709183, |
|
"learning_rate": 6.184665997806832e-07, |
|
"loss": 1.2108, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.8458070333633905, |
|
"grad_norm": 1.2846162043149028, |
|
"learning_rate": 6.114459069706252e-07, |
|
"loss": 1.2147, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.8467087466185753, |
|
"grad_norm": 1.310085155671471, |
|
"learning_rate": 6.044626945386894e-07, |
|
"loss": 1.2141, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.8476104598737602, |
|
"grad_norm": 1.2712954656833793, |
|
"learning_rate": 5.975170221248894e-07, |
|
"loss": 1.2311, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.848512173128945, |
|
"grad_norm": 1.2955924380936459, |
|
"learning_rate": 5.90608949048635e-07, |
|
"loss": 1.2223, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.8494138863841298, |
|
"grad_norm": 1.3049752396017495, |
|
"learning_rate": 5.837385343082152e-07, |
|
"loss": 1.2381, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.8503155996393147, |
|
"grad_norm": 1.2931102898743785, |
|
"learning_rate": 5.769058365803016e-07, |
|
"loss": 1.2164, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.8512173128944995, |
|
"grad_norm": 1.289678632558847, |
|
"learning_rate": 5.701109142194422e-07, |
|
"loss": 1.1922, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.8521190261496844, |
|
"grad_norm": 1.3321126371269403, |
|
"learning_rate": 5.633538252575677e-07, |
|
"loss": 1.1958, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.8530207394048692, |
|
"grad_norm": 1.2720222221403463, |
|
"learning_rate": 5.566346274034895e-07, |
|
"loss": 1.2272, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.8539224526600541, |
|
"grad_norm": 1.23704892854696, |
|
"learning_rate": 5.499533780424138e-07, |
|
"loss": 1.2108, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.8548241659152389, |
|
"grad_norm": 1.2541502216827884, |
|
"learning_rate": 5.433101342354474e-07, |
|
"loss": 1.2108, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.8557258791704238, |
|
"grad_norm": 1.2885605008569092, |
|
"learning_rate": 5.367049527191093e-07, |
|
"loss": 1.2257, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.8566275924256087, |
|
"grad_norm": 1.2927700166567266, |
|
"learning_rate": 5.301378899048514e-07, |
|
"loss": 1.2112, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8575293056807936, |
|
"grad_norm": 1.2839929669287422, |
|
"learning_rate": 5.236090018785705e-07, |
|
"loss": 1.2026, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.8584310189359784, |
|
"grad_norm": 1.3136146492814051, |
|
"learning_rate": 5.171183444001337e-07, |
|
"loss": 1.2331, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.8593327321911632, |
|
"grad_norm": 1.3001428661698673, |
|
"learning_rate": 5.106659729029007e-07, |
|
"loss": 1.1918, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.8602344454463481, |
|
"grad_norm": 1.2839843931089516, |
|
"learning_rate": 5.042519424932512e-07, |
|
"loss": 1.2202, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.8611361587015329, |
|
"grad_norm": 1.270892973171116, |
|
"learning_rate": 4.978763079501109e-07, |
|
"loss": 1.2201, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.8620378719567178, |
|
"grad_norm": 1.2916321550437255, |
|
"learning_rate": 4.915391237244876e-07, |
|
"loss": 1.2364, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.8629395852119026, |
|
"grad_norm": 1.3047521772223711, |
|
"learning_rate": 4.852404439390051e-07, |
|
"loss": 1.2193, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.8638412984670875, |
|
"grad_norm": 1.3099220594258418, |
|
"learning_rate": 4.789803223874423e-07, |
|
"loss": 1.2021, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.8647430117222723, |
|
"grad_norm": 1.3059600685193462, |
|
"learning_rate": 4.727588125342669e-07, |
|
"loss": 1.2213, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.8656447249774571, |
|
"grad_norm": 1.3207903999542119, |
|
"learning_rate": 4.665759675141901e-07, |
|
"loss": 1.2244, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.866546438232642, |
|
"grad_norm": 1.288523708061658, |
|
"learning_rate": 4.604318401317009e-07, |
|
"loss": 1.2316, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.8674481514878268, |
|
"grad_norm": 1.2572262151165579, |
|
"learning_rate": 4.543264828606264e-07, |
|
"loss": 1.2207, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.8683498647430117, |
|
"grad_norm": 1.2815957832640745, |
|
"learning_rate": 4.48259947843675e-07, |
|
"loss": 1.201, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.8692515779981965, |
|
"grad_norm": 1.2426149475176893, |
|
"learning_rate": 4.422322868919937e-07, |
|
"loss": 1.174, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.8701532912533815, |
|
"grad_norm": 1.2917796970754292, |
|
"learning_rate": 4.3624355148472796e-07, |
|
"loss": 1.2154, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.8710550045085663, |
|
"grad_norm": 1.2804388471447807, |
|
"learning_rate": 4.302937927685802e-07, |
|
"loss": 1.1898, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.8719567177637512, |
|
"grad_norm": 1.2965540391390407, |
|
"learning_rate": 4.2438306155737243e-07, |
|
"loss": 1.2193, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.872858431018936, |
|
"grad_norm": 1.3095171070129454, |
|
"learning_rate": 4.1851140833161163e-07, |
|
"loss": 1.2035, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.8737601442741209, |
|
"grad_norm": 1.3406437797884603, |
|
"learning_rate": 4.1267888323806294e-07, |
|
"loss": 1.2361, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.8746618575293057, |
|
"grad_norm": 1.2968176832526788, |
|
"learning_rate": 4.0688553608931313e-07, |
|
"loss": 1.2081, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8755635707844905, |
|
"grad_norm": 1.2398826667404843, |
|
"learning_rate": 4.011314163633573e-07, |
|
"loss": 1.19, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.8764652840396754, |
|
"grad_norm": 1.277753523037616, |
|
"learning_rate": 3.954165732031634e-07, |
|
"loss": 1.1806, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.8773669972948602, |
|
"grad_norm": 1.2786206929414288, |
|
"learning_rate": 3.897410554162623e-07, |
|
"loss": 1.2338, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.8782687105500451, |
|
"grad_norm": 1.291914303763212, |
|
"learning_rate": 3.841049114743239e-07, |
|
"loss": 1.2323, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.8791704238052299, |
|
"grad_norm": 1.2796108765905125, |
|
"learning_rate": 3.7850818951274903e-07, |
|
"loss": 1.2232, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.8800721370604148, |
|
"grad_norm": 1.259816182886979, |
|
"learning_rate": 3.729509373302548e-07, |
|
"loss": 1.1889, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.8809738503155996, |
|
"grad_norm": 1.2792451995025527, |
|
"learning_rate": 3.674332023884664e-07, |
|
"loss": 1.2116, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.8818755635707844, |
|
"grad_norm": 1.274657278905521, |
|
"learning_rate": 3.619550318115145e-07, |
|
"loss": 1.235, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.8827772768259693, |
|
"grad_norm": 1.257454729532636, |
|
"learning_rate": 3.5651647238562904e-07, |
|
"loss": 1.2106, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.8836789900811542, |
|
"grad_norm": 1.252377545649136, |
|
"learning_rate": 3.511175705587433e-07, |
|
"loss": 1.2043, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.8845807033363391, |
|
"grad_norm": 1.2521024978130257, |
|
"learning_rate": 3.4575837244009367e-07, |
|
"loss": 1.1983, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.8854824165915239, |
|
"grad_norm": 1.2923042352880336, |
|
"learning_rate": 3.4043892379982956e-07, |
|
"loss": 1.2339, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.8863841298467088, |
|
"grad_norm": 1.226700149978839, |
|
"learning_rate": 3.351592700686168e-07, |
|
"loss": 1.2028, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.8872858431018936, |
|
"grad_norm": 1.2665798312269472, |
|
"learning_rate": 3.299194563372604e-07, |
|
"loss": 1.2185, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.8881875563570785, |
|
"grad_norm": 1.253667104013189, |
|
"learning_rate": 3.247195273563047e-07, |
|
"loss": 1.191, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.8890892696122633, |
|
"grad_norm": 1.2429872892333635, |
|
"learning_rate": 3.1955952753566445e-07, |
|
"loss": 1.209, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.8899909828674482, |
|
"grad_norm": 1.325045978009465, |
|
"learning_rate": 3.144395009442369e-07, |
|
"loss": 1.224, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.890892696122633, |
|
"grad_norm": 1.329496806355262, |
|
"learning_rate": 3.093594913095299e-07, |
|
"loss": 1.211, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.8917944093778178, |
|
"grad_norm": 1.2305655303732355, |
|
"learning_rate": 3.043195420172879e-07, |
|
"loss": 1.2036, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.8926961226330027, |
|
"grad_norm": 1.2574131675915197, |
|
"learning_rate": 2.9931969611111777e-07, |
|
"loss": 1.2032, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8935978358881875, |
|
"grad_norm": 1.2662488779050474, |
|
"learning_rate": 2.943599962921279e-07, |
|
"loss": 1.2251, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.8944995491433724, |
|
"grad_norm": 1.330940318105111, |
|
"learning_rate": 2.89440484918555e-07, |
|
"loss": 1.2036, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.8954012623985572, |
|
"grad_norm": 1.2724811054244518, |
|
"learning_rate": 2.84561204005413e-07, |
|
"loss": 1.2275, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.8963029756537421, |
|
"grad_norm": 1.2428562642394823, |
|
"learning_rate": 2.7972219522412194e-07, |
|
"loss": 1.2087, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.8972046889089269, |
|
"grad_norm": 1.2550505429558814, |
|
"learning_rate": 2.7492349990216327e-07, |
|
"loss": 1.1932, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.8981064021641119, |
|
"grad_norm": 1.2889360575719786, |
|
"learning_rate": 2.701651590227178e-07, |
|
"loss": 1.2001, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.8990081154192967, |
|
"grad_norm": 1.3377452346886367, |
|
"learning_rate": 2.654472132243241e-07, |
|
"loss": 1.2136, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.8999098286744815, |
|
"grad_norm": 1.2974101369208686, |
|
"learning_rate": 2.6076970280052295e-07, |
|
"loss": 1.199, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.9008115419296664, |
|
"grad_norm": 1.2734787152019011, |
|
"learning_rate": 2.5613266769952183e-07, |
|
"loss": 1.2127, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.9017132551848512, |
|
"grad_norm": 1.2985739168739143, |
|
"learning_rate": 2.5153614752384534e-07, |
|
"loss": 1.1983, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9026149684400361, |
|
"grad_norm": 1.2573502235072782, |
|
"learning_rate": 2.469801815300027e-07, |
|
"loss": 1.2135, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.9035166816952209, |
|
"grad_norm": 1.2748461049930755, |
|
"learning_rate": 2.4246480862815226e-07, |
|
"loss": 1.2245, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.9044183949504058, |
|
"grad_norm": 1.2884368599511475, |
|
"learning_rate": 2.3799006738176422e-07, |
|
"loss": 1.2142, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.9053201082055906, |
|
"grad_norm": 1.2743626671513446, |
|
"learning_rate": 2.3355599600729916e-07, |
|
"loss": 1.2163, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.9062218214607755, |
|
"grad_norm": 1.313060889818672, |
|
"learning_rate": 2.2916263237387104e-07, |
|
"loss": 1.2059, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.9071235347159603, |
|
"grad_norm": 1.2833792613487849, |
|
"learning_rate": 2.2481001400293855e-07, |
|
"loss": 1.2131, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.9080252479711451, |
|
"grad_norm": 1.3170354684360084, |
|
"learning_rate": 2.204981780679677e-07, |
|
"loss": 1.2095, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.90892696122633, |
|
"grad_norm": 1.3165273872030707, |
|
"learning_rate": 2.1622716139412803e-07, |
|
"loss": 1.2189, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.9098286744815148, |
|
"grad_norm": 1.2612221063278017, |
|
"learning_rate": 2.1199700045797077e-07, |
|
"loss": 1.1943, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.9107303877366997, |
|
"grad_norm": 1.254731155862502, |
|
"learning_rate": 2.0780773138711908e-07, |
|
"loss": 1.2084, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9116321009918846, |
|
"grad_norm": 1.2638790136002085, |
|
"learning_rate": 2.036593899599615e-07, |
|
"loss": 1.195, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.9125338142470695, |
|
"grad_norm": 1.2774446957229728, |
|
"learning_rate": 1.9955201160534342e-07, |
|
"loss": 1.2388, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.9134355275022543, |
|
"grad_norm": 1.2814569340864863, |
|
"learning_rate": 1.9548563140226518e-07, |
|
"loss": 1.212, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.9143372407574392, |
|
"grad_norm": 1.265465404596977, |
|
"learning_rate": 1.9146028407958483e-07, |
|
"loss": 1.2067, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.915238954012624, |
|
"grad_norm": 1.269985400786495, |
|
"learning_rate": 1.874760040157181e-07, |
|
"loss": 1.2273, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.9161406672678089, |
|
"grad_norm": 1.321443696897108, |
|
"learning_rate": 1.8353282523834671e-07, |
|
"loss": 1.2235, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.9170423805229937, |
|
"grad_norm": 1.261142508109314, |
|
"learning_rate": 1.7963078142412883e-07, |
|
"loss": 1.203, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.9179440937781785, |
|
"grad_norm": 1.27184781211763, |
|
"learning_rate": 1.7576990589840747e-07, |
|
"loss": 1.2091, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.9188458070333634, |
|
"grad_norm": 1.2921875252777293, |
|
"learning_rate": 1.7195023163493253e-07, |
|
"loss": 1.2069, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.9197475202885482, |
|
"grad_norm": 1.2942084641451423, |
|
"learning_rate": 1.6817179125557026e-07, |
|
"loss": 1.2291, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9206492335437331, |
|
"grad_norm": 1.2439106153543784, |
|
"learning_rate": 1.6443461703003427e-07, |
|
"loss": 1.2141, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.9215509467989179, |
|
"grad_norm": 1.2883747080006507, |
|
"learning_rate": 1.6073874087560115e-07, |
|
"loss": 1.2058, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.9224526600541028, |
|
"grad_norm": 1.237116734838997, |
|
"learning_rate": 1.5708419435684463e-07, |
|
"loss": 1.2, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.9233543733092876, |
|
"grad_norm": 1.2705941080800744, |
|
"learning_rate": 1.5347100868536246e-07, |
|
"loss": 1.1878, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.9242560865644724, |
|
"grad_norm": 1.289668475702933, |
|
"learning_rate": 1.4989921471951163e-07, |
|
"loss": 1.2059, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.9251577998196574, |
|
"grad_norm": 1.252243810732813, |
|
"learning_rate": 1.4636884296414133e-07, |
|
"loss": 1.1894, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.9260595130748422, |
|
"grad_norm": 1.239862577289118, |
|
"learning_rate": 1.428799235703382e-07, |
|
"loss": 1.2062, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.9269612263300271, |
|
"grad_norm": 1.2792086558499869, |
|
"learning_rate": 1.3943248633516426e-07, |
|
"loss": 1.2289, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.9278629395852119, |
|
"grad_norm": 1.2585322782352686, |
|
"learning_rate": 1.3602656070140275e-07, |
|
"loss": 1.2187, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.9287646528403968, |
|
"grad_norm": 1.2649978962250352, |
|
"learning_rate": 1.3266217575730934e-07, |
|
"loss": 1.2335, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.9296663660955816, |
|
"grad_norm": 1.2709565276249286, |
|
"learning_rate": 1.2933936023636073e-07, |
|
"loss": 1.2253, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.9305680793507665, |
|
"grad_norm": 1.2741243155450708, |
|
"learning_rate": 1.2605814251701154e-07, |
|
"loss": 1.2155, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.9314697926059513, |
|
"grad_norm": 1.2962554005547353, |
|
"learning_rate": 1.2281855062245163e-07, |
|
"loss": 1.2323, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.9323715058611362, |
|
"grad_norm": 1.222038672850551, |
|
"learning_rate": 1.196206122203647e-07, |
|
"loss": 1.2294, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.933273219116321, |
|
"grad_norm": 1.273022165563276, |
|
"learning_rate": 1.1646435462269346e-07, |
|
"loss": 1.209, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.9341749323715058, |
|
"grad_norm": 1.317712793510341, |
|
"learning_rate": 1.1334980478540758e-07, |
|
"loss": 1.2239, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.9350766456266907, |
|
"grad_norm": 1.29768734193814, |
|
"learning_rate": 1.1027698930827169e-07, |
|
"loss": 1.2089, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.9359783588818755, |
|
"grad_norm": 1.2748192341276068, |
|
"learning_rate": 1.0724593443461883e-07, |
|
"loss": 1.2161, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.9368800721370604, |
|
"grad_norm": 1.3078946894653718, |
|
"learning_rate": 1.0425666605112516e-07, |
|
"loss": 1.2134, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.9377817853922452, |
|
"grad_norm": 1.2943299119489553, |
|
"learning_rate": 1.0130920968759228e-07, |
|
"loss": 1.2191, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9386834986474302, |
|
"grad_norm": 1.2524693445516681, |
|
"learning_rate": 9.84035905167241e-08, |
|
"loss": 1.2023, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.939585211902615, |
|
"grad_norm": 1.2254001564356944, |
|
"learning_rate": 9.553983335391647e-08, |
|
"loss": 1.191, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.9404869251577999, |
|
"grad_norm": 1.283320757635457, |
|
"learning_rate": 9.271796265704403e-08, |
|
"loss": 1.2217, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.9413886384129847, |
|
"grad_norm": 1.3413375530222929, |
|
"learning_rate": 8.993800252624863e-08, |
|
"loss": 1.2107, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.9422903516681695, |
|
"grad_norm": 1.2395249209659651, |
|
"learning_rate": 8.719997670373682e-08, |
|
"loss": 1.2085, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.9431920649233544, |
|
"grad_norm": 1.3019154798956944, |
|
"learning_rate": 8.450390857357549e-08, |
|
"loss": 1.2187, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.9440937781785392, |
|
"grad_norm": 1.2742821693649515, |
|
"learning_rate": 8.18498211614932e-08, |
|
"loss": 1.2058, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.9449954914337241, |
|
"grad_norm": 1.3231134618421645, |
|
"learning_rate": 7.923773713468197e-08, |
|
"loss": 1.2127, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.9458972046889089, |
|
"grad_norm": 1.277096810367392, |
|
"learning_rate": 7.666767880160464e-08, |
|
"loss": 1.2289, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.9467989179440938, |
|
"grad_norm": 1.2665767697644446, |
|
"learning_rate": 7.413966811180451e-08, |
|
"loss": 1.2099, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9477006311992786, |
|
"grad_norm": 1.3063066976618636, |
|
"learning_rate": 7.165372665571879e-08, |
|
"loss": 1.2369, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.9486023444544635, |
|
"grad_norm": 1.2888722722107182, |
|
"learning_rate": 6.920987566448989e-08, |
|
"loss": 1.1898, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.9495040577096483, |
|
"grad_norm": 1.2529437153897622, |
|
"learning_rate": 6.680813600979164e-08, |
|
"loss": 1.1879, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.9504057709648331, |
|
"grad_norm": 1.2660744074547863, |
|
"learning_rate": 6.444852820364222e-08, |
|
"loss": 1.2249, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.951307484220018, |
|
"grad_norm": 1.2731308456724335, |
|
"learning_rate": 6.213107239823602e-08, |
|
"loss": 1.1905, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.9522091974752029, |
|
"grad_norm": 1.2847581977653115, |
|
"learning_rate": 5.985578838576978e-08, |
|
"loss": 1.21, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.9531109107303878, |
|
"grad_norm": 1.2858831339292007, |
|
"learning_rate": 5.762269559826894e-08, |
|
"loss": 1.2408, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.9540126239855726, |
|
"grad_norm": 1.2835136436122037, |
|
"learning_rate": 5.54318131074294e-08, |
|
"loss": 1.2223, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.9549143372407575, |
|
"grad_norm": 1.2443952821022994, |
|
"learning_rate": 5.3283159624448745e-08, |
|
"loss": 1.1958, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.9558160504959423, |
|
"grad_norm": 1.264268819104271, |
|
"learning_rate": 5.117675349986917e-08, |
|
"loss": 1.1901, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.9567177637511272, |
|
"grad_norm": 1.2956493028514946, |
|
"learning_rate": 4.911261272341872e-08, |
|
"loss": 1.197, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.957619477006312, |
|
"grad_norm": 1.269520271669753, |
|
"learning_rate": 4.7090754923859725e-08, |
|
"loss": 1.1895, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.9585211902614968, |
|
"grad_norm": 1.2979719372812786, |
|
"learning_rate": 4.511119736883729e-08, |
|
"loss": 1.2298, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.9594229035166817, |
|
"grad_norm": 1.288532218908057, |
|
"learning_rate": 4.3173956964732145e-08, |
|
"loss": 1.2037, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.9603246167718665, |
|
"grad_norm": 1.2797908398125752, |
|
"learning_rate": 4.127905025651635e-08, |
|
"loss": 1.241, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.9612263300270514, |
|
"grad_norm": 1.3040381654756092, |
|
"learning_rate": 3.9426493427611177e-08, |
|
"loss": 1.1916, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.9621280432822362, |
|
"grad_norm": 1.2808361333680878, |
|
"learning_rate": 3.761630229974833e-08, |
|
"loss": 1.2143, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.9630297565374211, |
|
"grad_norm": 1.2950532754186528, |
|
"learning_rate": 3.584849233283838e-08, |
|
"loss": 1.2227, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.9639314697926059, |
|
"grad_norm": 1.2550421927753321, |
|
"learning_rate": 3.4123078624834214e-08, |
|
"loss": 1.2139, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.9648331830477908, |
|
"grad_norm": 1.2701351461973567, |
|
"learning_rate": 3.244007591160503e-08, |
|
"loss": 1.2109, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.9657348963029756, |
|
"grad_norm": 1.2571051407543998, |
|
"learning_rate": 3.079949856680975e-08, |
|
"loss": 1.2068, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.9666366095581606, |
|
"grad_norm": 1.2481617283700457, |
|
"learning_rate": 2.9201360601772698e-08, |
|
"loss": 1.2402, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.9675383228133454, |
|
"grad_norm": 1.237772880329741, |
|
"learning_rate": 2.7645675665367578e-08, |
|
"loss": 1.2181, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.9684400360685302, |
|
"grad_norm": 1.2782845837706895, |
|
"learning_rate": 2.6132457043896442e-08, |
|
"loss": 1.1945, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.9693417493237151, |
|
"grad_norm": 1.289759744788445, |
|
"learning_rate": 2.4661717660980356e-08, |
|
"loss": 1.2033, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.9702434625788999, |
|
"grad_norm": 1.311942673721432, |
|
"learning_rate": 2.323347007744503e-08, |
|
"loss": 1.2219, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.9711451758340848, |
|
"grad_norm": 1.2727763297469359, |
|
"learning_rate": 2.184772649121758e-08, |
|
"loss": 1.1994, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.9720468890892696, |
|
"grad_norm": 1.2557611757173046, |
|
"learning_rate": 2.0504498737219936e-08, |
|
"loss": 1.1953, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.9729486023444545, |
|
"grad_norm": 1.2781594119302995, |
|
"learning_rate": 1.920379828726726e-08, |
|
"loss": 1.1996, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.9738503155996393, |
|
"grad_norm": 1.3054939219702595, |
|
"learning_rate": 1.7945636249971364e-08, |
|
"loss": 1.2329, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.9747520288548241, |
|
"grad_norm": 1.310407071986216, |
|
"learning_rate": 1.6730023370645775e-08, |
|
"loss": 1.2282, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.975653742110009, |
|
"grad_norm": 1.2478522708231785, |
|
"learning_rate": 1.5556970031214145e-08, |
|
"loss": 1.2347, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.9765554553651938, |
|
"grad_norm": 1.272238571063739, |
|
"learning_rate": 1.4426486250119776e-08, |
|
"loss": 1.2115, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.9774571686203787, |
|
"grad_norm": 1.2597189160490305, |
|
"learning_rate": 1.333858168224178e-08, |
|
"loss": 1.1915, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.9783588818755635, |
|
"grad_norm": 1.2574926511170699, |
|
"learning_rate": 1.2293265618811834e-08, |
|
"loss": 1.2163, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.9792605951307484, |
|
"grad_norm": 1.2527364255680953, |
|
"learning_rate": 1.1290546987336448e-08, |
|
"loss": 1.2086, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.9801623083859333, |
|
"grad_norm": 1.27362762517652, |
|
"learning_rate": 1.0330434351518149e-08, |
|
"loss": 1.1843, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.9810640216411182, |
|
"grad_norm": 1.2973034143378401, |
|
"learning_rate": 9.412935911183863e-09, |
|
"loss": 1.1956, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.981965734896303, |
|
"grad_norm": 1.3018989918977304, |
|
"learning_rate": 8.538059502214979e-09, |
|
"loss": 1.2319, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.9828674481514879, |
|
"grad_norm": 1.2479095491543981, |
|
"learning_rate": 7.705812596479623e-09, |
|
"loss": 1.2188, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.9837691614066727, |
|
"grad_norm": 1.3272869423642981, |
|
"learning_rate": 6.9162023017699255e-09, |
|
"loss": 1.2234, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.9846708746618575, |
|
"grad_norm": 1.2488700072518422, |
|
"learning_rate": 6.169235361739856e-09, |
|
"loss": 1.1952, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.9855725879170424, |
|
"grad_norm": 1.2700332989633605, |
|
"learning_rate": 5.464918155849708e-09, |
|
"loss": 1.2117, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.9864743011722272, |
|
"grad_norm": 1.2768089943898375, |
|
"learning_rate": 4.803256699308923e-09, |
|
"loss": 1.1925, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.9873760144274121, |
|
"grad_norm": 1.2755150189086724, |
|
"learning_rate": 4.18425664302724e-09, |
|
"loss": 1.2056, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.9882777276825969, |
|
"grad_norm": 1.2451176059668059, |
|
"learning_rate": 3.6079232735647398e-09, |
|
"loss": 1.1898, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.9891794409377818, |
|
"grad_norm": 1.2758758036274904, |
|
"learning_rate": 3.074261513087984e-09, |
|
"loss": 1.2147, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.9900811541929666, |
|
"grad_norm": 1.2630686054047537, |
|
"learning_rate": 2.583275919327277e-09, |
|
"loss": 1.2157, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.9909828674481514, |
|
"grad_norm": 1.291774655594905, |
|
"learning_rate": 2.134970685536697e-09, |
|
"loss": 1.232, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.9918845807033363, |
|
"grad_norm": 1.2811485111546765, |
|
"learning_rate": 1.7293496404602316e-09, |
|
"loss": 1.2154, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9927862939585211, |
|
"grad_norm": 1.292187830956411, |
|
"learning_rate": 1.3664162482990296e-09, |
|
"loss": 1.1845, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.9936880072137061, |
|
"grad_norm": 1.2505142052902283, |
|
"learning_rate": 1.0461736086786467e-09, |
|
"loss": 1.2096, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.9945897204688909, |
|
"grad_norm": 1.2457145139334065, |
|
"learning_rate": 7.686244566273981e-10, |
|
"loss": 1.1985, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.9954914337240758, |
|
"grad_norm": 1.250521582131511, |
|
"learning_rate": 5.337711625497122e-10, |
|
"loss": 1.2144, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.9963931469792606, |
|
"grad_norm": 1.3207469421718456, |
|
"learning_rate": 3.416157322055913e-10, |
|
"loss": 1.2173, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.9972948602344455, |
|
"grad_norm": 1.305287692170028, |
|
"learning_rate": 1.921598066961794e-10, |
|
"loss": 1.1969, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.9981965734896303, |
|
"grad_norm": 1.281374648115917, |
|
"learning_rate": 8.540466244710832e-11, |
|
"loss": 1.2162, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.9990982867448152, |
|
"grad_norm": 1.2559201459727685, |
|
"learning_rate": 2.1351211199061028e-11, |
|
"loss": 1.2056, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.2684289549241197, |
|
"learning_rate": 0.0, |
|
"loss": 1.2128, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1109, |
|
"total_flos": 1474711721345024.0, |
|
"train_loss": 1.2891367367520001, |
|
"train_runtime": 18995.5344, |
|
"train_samples_per_second": 0.467, |
|
"train_steps_per_second": 0.058 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1109, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1474711721345024.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|